mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 07:20:10 +01:00
util/cache_ops: Add some cache flush helpers
The x86 implementation was shamelessly stolen from intel_mem.c and the aarch64 implementaiton was based on the code in Turnip. Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Tested-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37803>
This commit is contained in:
parent
1dea86f773
commit
555881e574
6 changed files with 611 additions and 1 deletions
115
src/util/cache_ops.h
Normal file
115
src/util/cache_ops.h
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
/*
|
||||
* Copyright © 2025 Collabora Ltd.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef UTIL_CACHE_OPS_H
|
||||
#define UTIL_CACHE_OPS_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include "detect_arch.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/** Returns true if we have cache operations available */
|
||||
static inline bool
|
||||
util_has_cache_ops(void)
|
||||
{
|
||||
/* TODO: Port to MSVC if and when we have Windows hardware drivers that
|
||||
* need cache flushing ops.
|
||||
*/
|
||||
#if defined(_MSC_VER)
|
||||
return false;
|
||||
#endif
|
||||
|
||||
return DETECT_ARCH_X86 || DETECT_ARCH_X86_64 || DETECT_ARCH_AARCH64;
|
||||
}
|
||||
|
||||
/** Returns the cache granularity
|
||||
*
|
||||
* This is the maximum number of bytes that may be overwritten as the result
|
||||
* of a cache flush or cache line eviction. On big.LITTLE platforms, the
|
||||
* cache flush helpers may sometimes operate at a smaller granularity but may
|
||||
* also round up to at most util_cache_granularity().
|
||||
*
|
||||
* Vulkan drivers should return this as nonCoherentAtomSize.
|
||||
*/
|
||||
size_t util_cache_granularity(void);
|
||||
|
||||
/** Flushes a range to main memory */
|
||||
void util_flush_range(void *start, size_t size);
|
||||
|
||||
/** Flushes a range to main memory and invalidates those cache lines */
|
||||
void util_flush_inval_range(void *start, size_t size);
|
||||
|
||||
/** Flushes a range to main memory without fencing
|
||||
*
|
||||
* This is for the case where you have a lot of ranges to flush and want to
|
||||
* avoid unnecessary fencing. In this case, call
|
||||
*
|
||||
* util_pre_flush_fence()
|
||||
* util_flush_range_no_fence()
|
||||
* util_flush_range_no_fence()
|
||||
* util_post_flush_fence()
|
||||
*/
|
||||
void util_flush_range_no_fence(void *start, size_t size);
|
||||
|
||||
/** Flushes a range to main memory and invalidates those cache lines without
|
||||
* fencing
|
||||
*
|
||||
* This is for the case where you have a lot of ranges to flush and invalidate
|
||||
* and want to avoid unnecessary fencing. In this case, call
|
||||
*
|
||||
* util_pre_flush_fence()
|
||||
* util_flush_inval_range_no_fence()
|
||||
* util_flush_range_no_fence()
|
||||
* util_flush_inval_range_no_fence()
|
||||
* util_post_flush_inval_fence()
|
||||
*/
|
||||
void util_flush_inval_range_no_fence(void *start, size_t size);
|
||||
|
||||
/** Fence between memory access and cache flush operations
|
||||
*
|
||||
* see util_flush_range_no_fence()
|
||||
*/
|
||||
void util_pre_flush_fence(void);
|
||||
|
||||
/** Fence between cache flush operations and memory access
|
||||
*
|
||||
* see util_flush_range_no_fence()
|
||||
*/
|
||||
void util_post_flush_fence(void);
|
||||
|
||||
/** Fence between cache invalidate operations and memory access
|
||||
*
|
||||
* see util_flush_inval_range_no_fence()
|
||||
*/
|
||||
void util_post_flush_inval_fence(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* UTIL_CACHE_OPS_H */
|
||||
228
src/util/cache_ops_aarch64.c
Normal file
228
src/util/cache_ops_aarch64.c
Normal file
|
|
@ -0,0 +1,228 @@
|
|||
/*
|
||||
* Copyright © 2025 Collabora Ltd. and Igalia S.L.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "cache_ops.h"
|
||||
|
||||
#include "util/macros.h"
|
||||
#include "util/u_atomic.h"
|
||||
|
||||
static uint32_t
|
||||
get_ctr_el0(void)
|
||||
{
|
||||
uint32_t ctr_el0;
|
||||
__asm("mrs\t%x0, ctr_el0" : "=r"(ctr_el0));
|
||||
return ctr_el0;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
get_ctr_cwg(void)
|
||||
{
|
||||
return (get_ctr_el0() >> 24) & 0xf;
|
||||
}
|
||||
|
||||
size_t
|
||||
util_cache_granularity(void)
|
||||
{
|
||||
static uint32_t cached_size = 0;
|
||||
uint32_t size = p_atomic_read(&cached_size);
|
||||
if (likely(size > 0))
|
||||
return size;
|
||||
|
||||
/* We use CTR_EL0.CWG as the cache granularity. According to Arm:
|
||||
*
|
||||
* "CWG, [27:24]
|
||||
*
|
||||
* Cache write-back granule. Log2 of the number of words of the maximum
|
||||
* size of memory that can be overwritten as a result of the eviction of
|
||||
* a cache entry that has had a memory location in it modified"
|
||||
*
|
||||
* On big.LITTLE CPUs, Linux will trap on fetching CTR_EL0 and take the
|
||||
* maximum across all CPU cores so this should really be the maximum that
|
||||
* drivers and clients can assume.
|
||||
*/
|
||||
size = 4 << ((get_ctr_el0() >> 24) & 0xf);
|
||||
|
||||
p_atomic_set(&cached_size, size);
|
||||
return size;
|
||||
}
|
||||
|
||||
static size_t
|
||||
get_dmin_line(void)
|
||||
{
|
||||
static uint32_t cached_size = 0;
|
||||
uint32_t size = p_atomic_read(&cached_size);
|
||||
if (likely(size > 0))
|
||||
return size;
|
||||
|
||||
/* For walking cache lines, we want to use CTR_EL0.DminLine as the step
|
||||
* size. According to Arm:
|
||||
*
|
||||
* "DminLine, [19:16]
|
||||
*
|
||||
* Log2 of the number of words in the smallest cache line of all the
|
||||
* data and unified caches that the core controls"
|
||||
*
|
||||
* On big.LITTLE CPUs, Linux will trap on fetching CTR_EL0 and take the
|
||||
* minimum across all CPU cores so this should be safe no matter what core
|
||||
* we happen to be living on.
|
||||
*/
|
||||
size = 4 << ((get_ctr_el0() >> 16) & 0xf);
|
||||
|
||||
p_atomic_set(&cached_size, size);
|
||||
return size;
|
||||
}
|
||||
|
||||
static void
|
||||
flush_l1_cacheline(UNUSED void *p)
|
||||
{
|
||||
/* Clean data cache. */
|
||||
__asm volatile("dc cvac, %0" : : "r" (p) : "memory");
|
||||
}
|
||||
|
||||
static void
|
||||
flush_inval_l1_cacheline(UNUSED void *p)
|
||||
{
|
||||
/* Clean and Invalidate data cache, there is no separate Invalidate. */
|
||||
__asm volatile("dc civac, %0" : : "r" (p) : "memory");
|
||||
}
|
||||
|
||||
static void
|
||||
data_sync_bar(void)
|
||||
{
|
||||
__asm volatile("dsb sy");
|
||||
}
|
||||
|
||||
void
|
||||
util_flush_range_no_fence(void *start, size_t size)
|
||||
{
|
||||
uintptr_t l1_cacheline_size = get_dmin_line();
|
||||
char *p = (char *) (((uintptr_t) start) & ~(l1_cacheline_size - 1));
|
||||
char *end = ((char *) start) + size;
|
||||
|
||||
while (p < end) {
|
||||
flush_l1_cacheline(p);
|
||||
p += l1_cacheline_size;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
util_flush_inval_range_no_fence(void *start, size_t size)
|
||||
{
|
||||
uintptr_t l1_cacheline_size = get_dmin_line();
|
||||
char *p = (char *) (((uintptr_t) start) & ~(l1_cacheline_size - 1));
|
||||
char *end = ((char *) start) + size;
|
||||
|
||||
while (p < end) {
|
||||
flush_inval_l1_cacheline(p);
|
||||
p += l1_cacheline_size;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
util_flush_range(void *p, size_t size)
|
||||
{
|
||||
if (size == 0)
|
||||
return;
|
||||
|
||||
util_pre_flush_fence();
|
||||
util_flush_range_no_fence(p, size);
|
||||
util_post_flush_fence();
|
||||
}
|
||||
|
||||
void
|
||||
util_flush_inval_range(void *p, size_t size)
|
||||
{
|
||||
if (size == 0)
|
||||
return;
|
||||
|
||||
util_pre_flush_fence();
|
||||
util_flush_inval_range_no_fence(p, size);
|
||||
util_post_flush_inval_fence();
|
||||
}
|
||||
|
||||
void
|
||||
util_pre_flush_fence(void)
|
||||
{
|
||||
/* From the Arm ® Architecture Reference Manual (revision L.b):
|
||||
*
|
||||
* "All data cache instructions, other than DC ZVA, DC GVA, and DC GZVA
|
||||
* that specify an address: [...] Execute in program order relative to
|
||||
* other data cache instructions, other than DC ZVA, DC GVA, and DC GZVA
|
||||
* that specify an address within the same cache line of minimum size,
|
||||
* as indicated by CTR_EL0.DMinLine."
|
||||
*
|
||||
* So cache flush operations are properly ordered against memory accesses
|
||||
* and there's nothing we need to do to ensure that prior writes land
|
||||
* before the cache flush operations flush the data.
|
||||
*
|
||||
* In the case where this pre_flush_fence() is called before a flush/inval
|
||||
* used for a GPU -> CPU barrier, there is also nothing to do because it's
|
||||
* the responsibility of the GPU to ensure that all memory writes have
|
||||
* landed before we see this on the CPU side.
|
||||
*/
|
||||
}
|
||||
|
||||
void
|
||||
util_post_flush_fence(void)
|
||||
{
|
||||
/* From the Arm ® Architecture Reference Manual (revision L.b):
|
||||
*
|
||||
* "A cache maintenance instruction can complete at any time after it is
|
||||
* executed, but is only guaranteed to be complete, and its effects
|
||||
* visible to other observers, following a DSB instruction executed by
|
||||
* the PE that executed the cache maintenance instruction."
|
||||
*
|
||||
* In order to ensure that the GPU sees data flushed by pror cache flushes,
|
||||
* we need to execute a DSB to ensure the flushes land.
|
||||
*/
|
||||
data_sync_bar();
|
||||
}
|
||||
|
||||
void
|
||||
util_post_flush_inval_fence(void)
|
||||
{
|
||||
/* From the Arm ® Architecture Reference Manual (revision L.b):
|
||||
*
|
||||
* "All data cache instructions, other than DC ZVA, DC GVA, and DC GZVA
|
||||
* that specify an address: [...] Execute in program order relative to
|
||||
* other data cache instructions, other than DC ZVA, DC GVA, and DC GZVA
|
||||
* that specify an address within the same cache line of minimum size,
|
||||
* as indicated by CTR_EL0.DMinLine."
|
||||
*
|
||||
* This seems to imply that memory access that happens after the cache
|
||||
* flush/invalidate operation would be properly ordered with respect to it.
|
||||
* However, the manual also says:
|
||||
*
|
||||
* "A cache maintenance instruction can complete at any time after it is
|
||||
* executed, but is only guaranteed to be complete, and its effects
|
||||
* visible to other observers, following a DSB instruction executed by
|
||||
* the PE that executed the cache maintenance instruction."
|
||||
*
|
||||
* In practice, it appears that the ordering guarantees only really apply
|
||||
* to the queue order in the data cache and not the order in which
|
||||
* operations complete. In other words, a read which is queued after the
|
||||
* invalidate may still use the stale cache line unless we explicitly
|
||||
* insert a DSB between them.
|
||||
*/
|
||||
data_sync_bar();
|
||||
}
|
||||
70
src/util/cache_ops_null.c
Normal file
70
src/util/cache_ops_null.c
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
/*
|
||||
* Copyright © 2025 Collabora Ltd.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "cache_ops.h"
|
||||
#include "util/macros.h"
|
||||
|
||||
size_t
|
||||
util_cache_granularity()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
util_flush_range(void *start, size_t size)
|
||||
{
|
||||
UNREACHABLE("Cache ops are not implemented on this platform");
|
||||
}
|
||||
|
||||
void
|
||||
util_flush_inval_range(void *start, size_t size)
|
||||
{
|
||||
UNREACHABLE("Cache ops are not implemented on this platform");
|
||||
}
|
||||
|
||||
void
|
||||
util_flush_range_no_fence(void *start, size_t size)
|
||||
{
|
||||
UNREACHABLE("Cache ops are not implemented on this platform");
|
||||
}
|
||||
|
||||
void
|
||||
util_flush_inval_range_no_fence(void *start, size_t size)
|
||||
{
|
||||
UNREACHABLE("Cache ops are not implemented on this platform");
|
||||
}
|
||||
|
||||
void util_pre_flush_fence(void)
|
||||
{
|
||||
UNREACHABLE("Cache ops are not implemented on this platform");
|
||||
}
|
||||
|
||||
void util_post_flush_fence(void)
|
||||
{
|
||||
UNREACHABLE("Cache ops are not implemented on this platform");
|
||||
}
|
||||
|
||||
void util_post_flush_inval_fence(void)
|
||||
{
|
||||
UNREACHABLE("Cache ops are not implemented on this platform");
|
||||
}
|
||||
129
src/util/cache_ops_x86.c
Normal file
129
src/util/cache_ops_x86.c
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
/*
|
||||
* Copyright © 2017 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "cache_ops.h"
|
||||
#include "u_cpu_detect.h"
|
||||
|
||||
#define CACHELINE_SIZE 64
|
||||
#define CACHELINE_MASK 63
|
||||
|
||||
size_t
|
||||
util_cache_granularity(void)
|
||||
{
|
||||
return util_get_cpu_caps()->cacheline;
|
||||
}
|
||||
|
||||
/* Defined in cache_ops_x86_clflushopt.c */
|
||||
#ifdef HAVE___BUILTIN_IA32_CLFLUSHOPT
|
||||
void util_clflushopt_range(void *start, size_t size);
|
||||
#endif
|
||||
|
||||
static void
|
||||
util_clflush_range(void *start, size_t size)
|
||||
{
|
||||
char *p = (char *) (((uintptr_t) start) & ~CACHELINE_MASK);
|
||||
char *end = ((char *) start) + size;
|
||||
|
||||
while (p < end) {
|
||||
__builtin_ia32_clflush(p);
|
||||
p += CACHELINE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
util_flush_range_no_fence(void *start, size_t size)
|
||||
{
|
||||
#ifdef HAVE___BUILTIN_IA32_CLFLUSHOPT
|
||||
if (util_get_cpu_caps()->has_clflushopt) {
|
||||
util_clflushopt_range(start, size);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
util_clflush_range(start, size);
|
||||
}
|
||||
|
||||
void
|
||||
util_flush_range(void *start, size_t size)
|
||||
{
|
||||
__builtin_ia32_mfence();
|
||||
util_clflush_range(start, size);
|
||||
#ifdef HAVE___BUILTIN_IA32_CLFLUSHOPT
|
||||
/* clflushopt doesn't include an mfence like clflush */
|
||||
if (util_get_cpu_caps()->has_clflushopt)
|
||||
__builtin_ia32_mfence();
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
util_flush_inval_range_no_fence(void *start, size_t size)
|
||||
{
|
||||
if (size == 0)
|
||||
return;
|
||||
|
||||
util_flush_range_no_fence(start, size);
|
||||
|
||||
/* Modern Atom CPUs (Baytrail+) have issues with clflush serialization,
|
||||
* where mfence is not a sufficient synchronization barrier. We must
|
||||
* double clflush the last cacheline. This guarantees it will be ordered
|
||||
* after the preceding clflushes, and then the mfence guards against
|
||||
* prefetches crossing the clflush boundary.
|
||||
*
|
||||
* See kernel commit 396f5d62d1a5fd99421855a08ffdef8edb43c76e
|
||||
* ("drm: Restore double clflush on the last partial cacheline")
|
||||
* and https://bugs.freedesktop.org/show_bug.cgi?id=92845.
|
||||
*/
|
||||
#ifdef HAVE___BUILTIN_IA32_CLFLUSHOPT
|
||||
if (util_get_cpu_caps()->has_clflushopt) {
|
||||
/* clflushopt doesn't include an mfence like clflush */
|
||||
__builtin_ia32_mfence();
|
||||
util_clflushopt_range((char *)start + size - 1, 1);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
__builtin_ia32_clflush((char *)start + size - 1);
|
||||
}
|
||||
|
||||
void
|
||||
util_flush_inval_range(void *start, size_t size)
|
||||
{
|
||||
util_flush_inval_range_no_fence(start, size);
|
||||
__builtin_ia32_mfence();
|
||||
}
|
||||
|
||||
void
|
||||
util_pre_flush_fence(void)
|
||||
{
|
||||
__builtin_ia32_mfence();
|
||||
}
|
||||
|
||||
void
|
||||
util_post_flush_fence(void)
|
||||
{
|
||||
__builtin_ia32_mfence();
|
||||
}
|
||||
|
||||
void
|
||||
util_post_flush_inval_fence(void)
|
||||
{
|
||||
__builtin_ia32_mfence();
|
||||
}
|
||||
46
src/util/cache_ops_x86_clflushopt.c
Normal file
46
src/util/cache_ops_x86_clflushopt.c
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Copyright © 2017 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "util/u_cpu_detect.h"
|
||||
|
||||
#ifndef HAVE___BUILTIN_IA32_CLFLUSHOPT
|
||||
#error "Compiler doesn't support clflushopt!"
|
||||
#endif
|
||||
|
||||
void util_clflushopt_range(void *start, size_t size);
|
||||
|
||||
void
|
||||
util_clflushopt_range(void *start, size_t size)
|
||||
{
|
||||
const struct util_cpu_caps_t *cpu_caps = util_get_cpu_caps();
|
||||
assert(cpu_caps->has_clflushopt);
|
||||
assert(cpu_caps->cacheline > 0);
|
||||
void *p = (void *) (((uintptr_t) start) &
|
||||
~((uintptr_t)cpu_caps->cacheline - 1));
|
||||
void *end = start + size;
|
||||
|
||||
while (p < end) {
|
||||
__builtin_ia32_clflushopt(p);
|
||||
p += cpu_caps->cacheline;
|
||||
}
|
||||
}
|
||||
|
|
@ -25,6 +25,7 @@ files_mesa_util = files(
|
|||
'box.h',
|
||||
'build_id.c',
|
||||
'build_id.h',
|
||||
'cache_ops.h',
|
||||
'cnd_monotonic.c',
|
||||
'cnd_monotonic.h',
|
||||
'compiler.h',
|
||||
|
|
@ -182,6 +183,26 @@ files_mesa_util = files(
|
|||
'mesa_cache_db_multipart.h',
|
||||
)
|
||||
|
||||
libmesa_util_links = []
|
||||
|
||||
if host_machine.cpu_family() == 'aarch64' and cc.get_id() != 'msvc'
|
||||
files_mesa_util += files('cache_ops_aarch64.c')
|
||||
elif host_machine.cpu_family() in ['x86', 'x86_64'] and cc.get_id() != 'msvc'
|
||||
files_mesa_util += files('cache_ops_x86.c')
|
||||
if with_clflushopt
|
||||
libmesa_util_clflushopt = static_library(
|
||||
'mesa_util_clflushopt',
|
||||
['cache_ops_x86_clflushopt.c'],
|
||||
include_directories : [inc_util],
|
||||
c_args : [no_override_init_args] + clflushopt_args,
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
)
|
||||
libmesa_util_links += libmesa_util_clflushopt
|
||||
endif
|
||||
else
|
||||
files_mesa_util += files('cache_ops_null.c')
|
||||
endif
|
||||
|
||||
files_drirc = files('00-mesa-defaults.conf')
|
||||
|
||||
if with_amd_vk
|
||||
|
|
@ -304,13 +325,14 @@ libmesa_util_simd = static_library(
|
|||
gnu_symbol_visibility : 'hidden',
|
||||
build_by_default : false,
|
||||
)
|
||||
libmesa_util_links += libmesa_util_simd
|
||||
|
||||
_libmesa_util = static_library(
|
||||
'mesa_util',
|
||||
[files_mesa_util, files_debug_stack, format_srgb],
|
||||
include_directories : [inc_util, include_directories('format')],
|
||||
dependencies : deps_for_libmesa_util,
|
||||
link_with: [libmesa_util_simd],
|
||||
link_with: libmesa_util_links,
|
||||
c_args : [c_msvc_compat_args],
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
build_by_default : false
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue