mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 21:50:12 +01:00
i965: Revert recent tiled memcpy changes.
This reverts commit79fe00efb4. This reverts commitf5e8b13f78. This reverts commitd21c086d81. They broke the Android build and I'd rather not leave it broken for the long holiday weekend.
This commit is contained in:
parent
79fe00efb4
commit
58fb613a51
5 changed files with 9 additions and 186 deletions
|
|
@ -92,14 +92,8 @@ libi965_gen11_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=110
|
|||
|
||||
noinst_LTLIBRARIES = \
|
||||
libi965_dri.la \
|
||||
libintel_tiled_memcpy.la \
|
||||
$(I965_PERGEN_LIBS)
|
||||
|
||||
libintel_tiled_memcpy_la_SOURCES = \
|
||||
$(intel_tiled_memcpy_FILES)
|
||||
libintel_tiled_memcpy_la_CFLAGS = \
|
||||
$(AM_CFLAGS) $(SSE41_CFLAGS)
|
||||
|
||||
libi965_dri_la_SOURCES = \
|
||||
$(i965_FILES) \
|
||||
$(i965_oa_GENERATED_FILES)
|
||||
|
|
@ -110,7 +104,6 @@ libi965_dri_la_LIBADD = \
|
|||
$(top_builddir)/src/intel/compiler/libintel_compiler.la \
|
||||
$(top_builddir)/src/intel/blorp/libblorp.la \
|
||||
$(I965_PERGEN_LIBS) \
|
||||
libintel_tiled_memcpy.la
|
||||
$(LIBDRM_LIBS)
|
||||
|
||||
BUILT_SOURCES = $(i965_oa_GENERATED_FILES)
|
||||
|
|
|
|||
|
|
@ -110,13 +110,11 @@ i965_FILES = \
|
|||
intel_tex_image.c \
|
||||
intel_tex_obj.h \
|
||||
intel_tex_validate.c \
|
||||
intel_tiled_memcpy.c \
|
||||
intel_tiled_memcpy.h \
|
||||
intel_upload.c \
|
||||
libdrm_macros.h
|
||||
|
||||
intel_tiled_memcpy_FILES = \
|
||||
intel_tiled_memcpy.c \
|
||||
intel_tiled_memcpy.h
|
||||
|
||||
i965_gen4_FILES = \
|
||||
genX_blorp_exec.c \
|
||||
genX_state_upload.c
|
||||
|
|
|
|||
|
|
@ -31,7 +31,6 @@
|
|||
#include "intel_image.h"
|
||||
#include "intel_mipmap_tree.h"
|
||||
#include "intel_tex.h"
|
||||
#include "intel_tiled_memcpy.h"
|
||||
#include "intel_blit.h"
|
||||
#include "intel_fbo.h"
|
||||
|
||||
|
|
@ -3024,7 +3023,7 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
|
|||
}
|
||||
|
||||
static void
|
||||
intel_miptree_unmap_map(struct brw_context *brw,
|
||||
intel_miptree_unmap_gtt(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt,
|
||||
struct intel_miptree_map *map,
|
||||
unsigned int level, unsigned int slice)
|
||||
|
|
@ -3033,7 +3032,7 @@ intel_miptree_unmap_map(struct brw_context *brw,
|
|||
}
|
||||
|
||||
static void
|
||||
intel_miptree_map_map(struct brw_context *brw,
|
||||
intel_miptree_map_gtt(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt,
|
||||
struct intel_miptree_map *map,
|
||||
unsigned int level, unsigned int slice)
|
||||
|
|
@ -3081,7 +3080,7 @@ intel_miptree_map_map(struct brw_context *brw,
|
|||
mt, _mesa_get_format_name(mt->format),
|
||||
x, y, map->ptr, map->stride);
|
||||
|
||||
map->unmap = intel_miptree_unmap_map;
|
||||
map->unmap = intel_miptree_unmap_gtt;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -3113,94 +3112,6 @@ intel_miptree_unmap_blit(struct brw_context *brw,
|
|||
intel_miptree_release(&map->linear_mt);
|
||||
}
|
||||
|
||||
/* Compute extent parameters for use with tiled_memcpy functions.
|
||||
* xs are in units of bytes and ys are in units of strides.
|
||||
*/
|
||||
static inline void
|
||||
tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map,
|
||||
unsigned int level, unsigned int slice, unsigned int *x1_B,
|
||||
unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el)
|
||||
{
|
||||
unsigned int block_width, block_height;
|
||||
unsigned int x0_el, y0_el;
|
||||
|
||||
_mesa_get_format_block_size(mt->format, &block_width, &block_height);
|
||||
|
||||
assert(map->x % block_width == 0);
|
||||
assert(map->y % block_height == 0);
|
||||
|
||||
intel_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el);
|
||||
*x1_B = (map->x / block_width + x0_el) * mt->cpp;
|
||||
*y1_el = map->y / block_height + y0_el;
|
||||
*x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp;
|
||||
*y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el;
|
||||
}
|
||||
|
||||
static void
|
||||
intel_miptree_unmap_tiled_memcpy(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt,
|
||||
struct intel_miptree_map *map,
|
||||
unsigned int level,
|
||||
unsigned int slice)
|
||||
{
|
||||
if (map->mode & GL_MAP_WRITE_BIT) {
|
||||
unsigned int x1, x2, y1, y2;
|
||||
tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
|
||||
|
||||
char *dst = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
|
||||
dst += mt->offset;
|
||||
|
||||
linear_to_tiled(x1, x2, y1, y2, dst, map->ptr, mt->surf.row_pitch,
|
||||
map->stride, brw->has_swizzling, mt->surf.tiling, memcpy);
|
||||
|
||||
intel_miptree_unmap_raw(mt);
|
||||
}
|
||||
_mesa_align_free(map->buffer);
|
||||
map->buffer = map->ptr = NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
intel_miptree_map_tiled_memcpy(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt,
|
||||
struct intel_miptree_map *map,
|
||||
unsigned int level, unsigned int slice)
|
||||
{
|
||||
intel_miptree_access_raw(brw, mt, level, slice,
|
||||
map->mode & GL_MAP_WRITE_BIT);
|
||||
|
||||
unsigned int x1, x2, y1, y2;
|
||||
tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
|
||||
map->stride = ALIGN(_mesa_format_row_stride(mt->format, map->w), 16);
|
||||
|
||||
/* The tiling and detiling functions require that the linear buffer
|
||||
* has proper 16-byte alignment (that is, its `x0` is 16-byte
|
||||
* aligned). Here we over-allocate the linear buffer by enough
|
||||
* bytes to get the proper alignment.
|
||||
*/
|
||||
map->buffer = _mesa_align_malloc(map->stride * (y2 - y1) + (x1 & 0xf), 16);
|
||||
map->ptr = (char *)map->buffer + (x1 & 0xf);
|
||||
assert(map->buffer);
|
||||
|
||||
if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
|
||||
char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
|
||||
src += mt->offset;
|
||||
|
||||
const mem_copy_fn fn =
|
||||
#if defined(USE_SSE41)
|
||||
cpu_has_sse4_1 ? (mem_copy_fn)_mesa_streaming_load_memcpy :
|
||||
#endif
|
||||
memcpy;
|
||||
|
||||
tiled_to_linear(x1, x2, y1, y2, map->ptr, src, map->stride,
|
||||
mt->surf.row_pitch, brw->has_swizzling, mt->surf.tiling,
|
||||
fn);
|
||||
|
||||
intel_miptree_unmap_raw(mt);
|
||||
}
|
||||
|
||||
map->unmap = intel_miptree_unmap_tiled_memcpy;
|
||||
}
|
||||
|
||||
static void
|
||||
intel_miptree_map_blit(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt,
|
||||
|
|
@ -3732,7 +3643,6 @@ intel_miptree_map(struct brw_context *brw,
|
|||
void **out_ptr,
|
||||
ptrdiff_t *out_stride)
|
||||
{
|
||||
const struct gen_device_info *devinfo = &brw->screen->devinfo;
|
||||
struct intel_miptree_map *map;
|
||||
|
||||
assert(mt->surf.samples == 1);
|
||||
|
|
@ -3753,8 +3663,6 @@ intel_miptree_map(struct brw_context *brw,
|
|||
intel_miptree_map_depthstencil(brw, mt, map, level, slice);
|
||||
} else if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) {
|
||||
intel_miptree_map_blit(brw, mt, map, level, slice);
|
||||
} else if (mt->surf.tiling != ISL_TILING_LINEAR && devinfo->gen > 4) {
|
||||
intel_miptree_map_tiled_memcpy(brw, mt, map, level, slice);
|
||||
#if defined(USE_SSE41)
|
||||
} else if (!(mode & GL_MAP_WRITE_BIT) &&
|
||||
!mt->compressed && cpu_has_sse4_1 &&
|
||||
|
|
@ -3762,9 +3670,7 @@ intel_miptree_map(struct brw_context *brw,
|
|||
intel_miptree_map_movntdqa(brw, mt, map, level, slice);
|
||||
#endif
|
||||
} else {
|
||||
if (mt->surf.tiling != ISL_TILING_LINEAR)
|
||||
perf_debug("intel_miptree_map: mapping via gtt");
|
||||
intel_miptree_map_map(brw, mt, map, level, slice);
|
||||
intel_miptree_map_gtt(brw, mt, map, level, slice);
|
||||
}
|
||||
|
||||
*out_ptr = map->ptr;
|
||||
|
|
|
|||
|
|
@ -36,10 +36,6 @@
|
|||
#include "brw_context.h"
|
||||
#include "intel_tiled_memcpy.h"
|
||||
|
||||
#if defined(USE_SSE41)
|
||||
#include "main/streaming-load-memcpy.h"
|
||||
#include <smmintrin.h>
|
||||
#endif
|
||||
#if defined(__SSSE3__)
|
||||
#include <tmmintrin.h>
|
||||
#elif defined(__SSE2__)
|
||||
|
|
@ -217,31 +213,6 @@ rgba8_copy_aligned_src(void *dst, const void *src, size_t bytes)
|
|||
return dst;
|
||||
}
|
||||
|
||||
#if defined(USE_SSE41)
|
||||
static ALWAYS_INLINE void *
|
||||
_memcpy_streaming_load(void *dest, const void *src, size_t count)
|
||||
{
|
||||
if (count == 16) {
|
||||
__m128i val = _mm_stream_load_si128((__m128i *)src);
|
||||
_mm_storeu_si128((__m128i *)dest, val);
|
||||
return dest;
|
||||
} else if (count == 64) {
|
||||
__m128i val0 = _mm_stream_load_si128(((__m128i *)src) + 0);
|
||||
__m128i val1 = _mm_stream_load_si128(((__m128i *)src) + 1);
|
||||
__m128i val2 = _mm_stream_load_si128(((__m128i *)src) + 2);
|
||||
__m128i val3 = _mm_stream_load_si128(((__m128i *)src) + 3);
|
||||
_mm_storeu_si128(((__m128i *)dest) + 0, val0);
|
||||
_mm_storeu_si128(((__m128i *)dest) + 1, val1);
|
||||
_mm_storeu_si128(((__m128i *)dest) + 2, val2);
|
||||
_mm_storeu_si128(((__m128i *)dest) + 3, val3);
|
||||
return dest;
|
||||
} else {
|
||||
assert(count < 64); /* and (count < 16) for ytiled */
|
||||
return memcpy(dest, src, count);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Each row from y0 to y1 is copied in three parts: [x0,x1), [x1,x2), [x2,x3).
|
||||
* These ranges are in bytes, i.e. pixels * bytes-per-pixel.
|
||||
|
|
@ -706,12 +677,6 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
|
|||
return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
|
||||
dst, src, dst_pitch, swizzle_bit,
|
||||
rgba8_copy, rgba8_copy_aligned_src);
|
||||
#if defined(USE_SSE41)
|
||||
else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy)
|
||||
return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
|
||||
dst, src, dst_pitch, swizzle_bit,
|
||||
memcpy, _memcpy_streaming_load);
|
||||
#endif
|
||||
else
|
||||
unreachable("not reached");
|
||||
} else {
|
||||
|
|
@ -722,12 +687,6 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
|
|||
return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
|
||||
dst, src, dst_pitch, swizzle_bit,
|
||||
rgba8_copy, rgba8_copy_aligned_src);
|
||||
#if defined(USE_SSE41)
|
||||
else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy)
|
||||
return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
|
||||
dst, src, dst_pitch, swizzle_bit,
|
||||
memcpy, _memcpy_streaming_load);
|
||||
#endif
|
||||
else
|
||||
unreachable("not reached");
|
||||
}
|
||||
|
|
@ -760,12 +719,6 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
|
|||
return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
|
||||
dst, src, dst_pitch, swizzle_bit,
|
||||
rgba8_copy, rgba8_copy_aligned_src);
|
||||
#if defined(USE_SSE41)
|
||||
else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy)
|
||||
return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
|
||||
dst, src, dst_pitch, swizzle_bit,
|
||||
memcpy, _memcpy_streaming_load);
|
||||
#endif
|
||||
else
|
||||
unreachable("not reached");
|
||||
} else {
|
||||
|
|
@ -776,12 +729,6 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
|
|||
return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
|
||||
dst, src, dst_pitch, swizzle_bit,
|
||||
rgba8_copy, rgba8_copy_aligned_src);
|
||||
#if defined(USE_SSE41)
|
||||
else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy)
|
||||
return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
|
||||
dst, src, dst_pitch, swizzle_bit,
|
||||
memcpy, _memcpy_streaming_load);
|
||||
#endif
|
||||
else
|
||||
unreachable("not reached");
|
||||
}
|
||||
|
|
@ -921,15 +868,6 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2,
|
|||
unreachable("unsupported tiling");
|
||||
}
|
||||
|
||||
#if defined(USE_SSE41)
|
||||
if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy) {
|
||||
/* The hidden cacheline sized register used by movntdqa can apparently
|
||||
* give you stale data, so do an mfence to invalidate it.
|
||||
*/
|
||||
_mm_mfence();
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Round out to tile boundaries. */
|
||||
xt0 = ALIGN_DOWN(xt1, tw);
|
||||
xt3 = ALIGN_UP (xt2, tw);
|
||||
|
|
|
|||
|
|
@ -129,13 +129,10 @@ files_i965 = files(
|
|||
'intel_tex_image.c',
|
||||
'intel_tex_obj.h',
|
||||
'intel_tex_validate.c',
|
||||
'intel_upload.c',
|
||||
'libdrm_macros.h',
|
||||
)
|
||||
|
||||
files_intel_tiled_memcpy = files(
|
||||
'intel_tiled_memcpy.c',
|
||||
'intel_tiled_memcpy.h',
|
||||
'intel_upload.c',
|
||||
'libdrm_macros.h',
|
||||
)
|
||||
|
||||
i965_gen_libs = []
|
||||
|
|
@ -179,15 +176,6 @@ i965_oa_sources = custom_target(
|
|||
],
|
||||
)
|
||||
|
||||
intel_tiled_memcpy = static_library(
|
||||
'intel_tiled_memcpy',
|
||||
[files_intel_tiled_memcpy],
|
||||
include_directories : [
|
||||
inc_common, inc_intel, inc_dri_common, inc_drm_uapi,
|
||||
],
|
||||
c_args : [c_vis_args, no_override_init_args, '-msse2', sse41_args],
|
||||
)
|
||||
|
||||
libi965 = static_library(
|
||||
'i965',
|
||||
[files_i965, i965_oa_sources, ir_expression_operation_h,
|
||||
|
|
@ -199,7 +187,7 @@ libi965 = static_library(
|
|||
cpp_args : [cpp_vis_args, '-msse2'],
|
||||
link_with : [
|
||||
i965_gen_libs, libintel_common, libintel_dev, libisl, libintel_compiler,
|
||||
libblorp, intel_tiled_memcpy,
|
||||
libblorp,
|
||||
],
|
||||
dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers],
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue