mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
radeonsi: remove the old si_compute_copy_image
It's replaced by the compute blit. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28917>
This commit is contained in:
parent
b0c0cca3a7
commit
40bcb588dd
4 changed files with 0 additions and 292 deletions
|
|
@ -11,36 +11,6 @@
|
|||
#include "util/hash_table.h"
|
||||
#include "util/u_pack_color.h"
|
||||
|
||||
static bool si_can_use_compute_blit(struct si_context *sctx, enum pipe_format format,
|
||||
unsigned num_samples, bool is_store, bool has_dcc)
|
||||
{
|
||||
/* TODO: This format fails AMD_TEST=imagecopy. */
|
||||
if (format == PIPE_FORMAT_A8R8_UNORM && is_store)
|
||||
return false;
|
||||
|
||||
/* MSAA image stores are broken. AMD_DEBUG=nofmask fixes them, implying that the FMASK
|
||||
* expand pass doesn't work, but let's use the gfx blit, which should be faster because
|
||||
* it doesn't require expanding the FMASK.
|
||||
*
|
||||
* TODO: Broken MSAA stores can cause app issues, though this issue might only affect
|
||||
* internal blits, not sure.
|
||||
*
|
||||
* EQAA image stores are also unimplemented, which should be rejected here after MSAA
|
||||
* image stores are fixed.
|
||||
*/
|
||||
if (num_samples > 1 && is_store)
|
||||
return false;
|
||||
|
||||
if (util_format_is_depth_or_stencil(format))
|
||||
return false;
|
||||
|
||||
/* Image stores support DCC since GFX10. */
|
||||
if (has_dcc && is_store && sctx->gfx_level < GFX10)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Determine the cache policy. */
|
||||
static enum si_cache_policy get_cache_policy(struct si_context *sctx, enum si_coherency coher,
|
||||
uint64_t size)
|
||||
|
|
@ -553,188 +523,6 @@ static void si_launch_grid_internal_images(struct si_context *sctx,
|
|||
pipe_resource_reference(&saved_image[i].resource, NULL);
|
||||
}
|
||||
|
||||
bool si_compute_copy_image_old(struct si_context *sctx, struct pipe_resource *dst, unsigned dst_level,
|
||||
struct pipe_resource *src, unsigned src_level, unsigned dstx,
|
||||
unsigned dsty, unsigned dstz, const struct pipe_box *src_box,
|
||||
unsigned flags)
|
||||
{
|
||||
struct si_texture *ssrc = (struct si_texture*)src;
|
||||
struct si_texture *sdst = (struct si_texture*)dst;
|
||||
|
||||
/* The compute copy is mandatory for compressed and subsampled formats because the gfx copy
|
||||
* doesn't support them. In all other cases, call si_can_use_compute_blit.
|
||||
*
|
||||
* The format is identical (we only need to check the src format) except compressed formats,
|
||||
* which can be paired with an equivalent integer format.
|
||||
*/
|
||||
if (!util_format_is_compressed(src->format) &&
|
||||
!util_format_is_compressed(dst->format) &&
|
||||
!util_format_is_subsampled_422(src->format)) {
|
||||
bool src_can_use_compute_blit =
|
||||
si_can_use_compute_blit(sctx, src->format, src->nr_samples, false,
|
||||
vi_dcc_enabled(ssrc, src_level));
|
||||
|
||||
if (!src_can_use_compute_blit)
|
||||
return false;
|
||||
|
||||
bool dst_can_use_compute_blit =
|
||||
si_can_use_compute_blit(sctx, dst->format, dst->nr_samples, true,
|
||||
vi_dcc_enabled(sdst, dst_level));
|
||||
|
||||
if (!dst_can_use_compute_blit && !sctx->has_graphics &&
|
||||
si_can_use_compute_blit(sctx, dst->format, dst->nr_samples, false,
|
||||
vi_dcc_enabled(sdst, dst_level))) {
|
||||
/* Non-graphics context don't have a blitter, so try harder to do
|
||||
* a compute blit by disabling dcc on the destination texture.
|
||||
*/
|
||||
dst_can_use_compute_blit = si_texture_disable_dcc(sctx, sdst);
|
||||
}
|
||||
|
||||
if (!dst_can_use_compute_blit)
|
||||
return false;
|
||||
}
|
||||
|
||||
enum pipe_format src_format = util_format_linear(src->format);
|
||||
enum pipe_format dst_format = util_format_linear(dst->format);
|
||||
bool is_linear = ssrc->surface.is_linear || sdst->surface.is_linear;
|
||||
|
||||
assert(util_format_is_subsampled_422(src_format) == util_format_is_subsampled_422(dst_format));
|
||||
|
||||
/* Interpret as integer values to avoid NaN issues */
|
||||
if (!vi_dcc_enabled(ssrc, src_level) &&
|
||||
!vi_dcc_enabled(sdst, dst_level) &&
|
||||
src_format == dst_format &&
|
||||
util_format_is_float(src_format) &&
|
||||
!util_format_is_compressed(src_format)) {
|
||||
switch(util_format_get_blocksizebits(src_format)) {
|
||||
case 16:
|
||||
src_format = dst_format = PIPE_FORMAT_R16_UINT;
|
||||
break;
|
||||
case 32:
|
||||
src_format = dst_format = PIPE_FORMAT_R32_UINT;
|
||||
break;
|
||||
case 64:
|
||||
src_format = dst_format = PIPE_FORMAT_R32G32_UINT;
|
||||
break;
|
||||
case 128:
|
||||
src_format = dst_format = PIPE_FORMAT_R32G32B32A32_UINT;
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
/* Interpret compressed formats as UINT. */
|
||||
struct pipe_box new_box;
|
||||
unsigned src_access = 0, dst_access = 0;
|
||||
|
||||
/* Note that staging copies do compressed<->UINT, so one of the formats is already UINT. */
|
||||
if (util_format_is_compressed(src_format) || util_format_is_compressed(dst_format)) {
|
||||
if (util_format_is_compressed(src_format))
|
||||
src_access |= SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT;
|
||||
if (util_format_is_compressed(dst_format))
|
||||
dst_access |= SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT;
|
||||
|
||||
dstx = util_format_get_nblocksx(dst_format, dstx);
|
||||
dsty = util_format_get_nblocksy(dst_format, dsty);
|
||||
|
||||
new_box.x = util_format_get_nblocksx(src_format, src_box->x);
|
||||
new_box.y = util_format_get_nblocksy(src_format, src_box->y);
|
||||
new_box.z = src_box->z;
|
||||
new_box.width = util_format_get_nblocksx(src_format, src_box->width);
|
||||
new_box.height = util_format_get_nblocksy(src_format, src_box->height);
|
||||
new_box.depth = src_box->depth;
|
||||
src_box = &new_box;
|
||||
|
||||
if (ssrc->surface.bpe == 8)
|
||||
src_format = dst_format = PIPE_FORMAT_R16G16B16A16_UINT; /* 64-bit block */
|
||||
else
|
||||
src_format = dst_format = PIPE_FORMAT_R32G32B32A32_UINT; /* 128-bit block */
|
||||
}
|
||||
|
||||
if (util_format_is_subsampled_422(src_format)) {
|
||||
assert(src_format == dst_format);
|
||||
|
||||
src_access |= SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT;
|
||||
dst_access |= SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT;
|
||||
|
||||
dstx = util_format_get_nblocksx(src_format, dstx);
|
||||
|
||||
src_format = dst_format = PIPE_FORMAT_R32_UINT;
|
||||
|
||||
/* Interpreting 422 subsampled format (16 bpp) as 32 bpp
|
||||
* should force us to divide src_box->x, dstx and width by 2.
|
||||
* But given that ac_surface allocates this format as 32 bpp
|
||||
* and that surf_size is then modified to pack the values
|
||||
* we must keep the original values to get the correct results.
|
||||
*/
|
||||
}
|
||||
|
||||
/* SNORM blitting has precision issues. Use the SINT equivalent instead, which doesn't
|
||||
* force DCC decompression.
|
||||
*/
|
||||
if (util_format_is_snorm(dst_format))
|
||||
src_format = dst_format = util_format_snorm_to_sint(dst_format);
|
||||
|
||||
if (src_box->width == 0 || src_box->height == 0 || src_box->depth == 0)
|
||||
return true; /* success - nothing to do */
|
||||
|
||||
struct pipe_image_view image[2] = {0};
|
||||
image[0].resource = src;
|
||||
image[0].shader_access = image[0].access = PIPE_IMAGE_ACCESS_READ | src_access;
|
||||
image[0].format = src_format;
|
||||
image[0].u.tex.level = src_level;
|
||||
image[0].u.tex.first_layer = 0;
|
||||
image[0].u.tex.last_layer = util_max_layer(src, src_level);
|
||||
image[1].resource = dst;
|
||||
image[1].shader_access = image[1].access = PIPE_IMAGE_ACCESS_WRITE | dst_access;
|
||||
image[1].format = dst_format;
|
||||
image[1].u.tex.level = dst_level;
|
||||
image[1].u.tex.first_layer = 0;
|
||||
image[1].u.tex.last_layer = util_max_layer(dst, dst_level);
|
||||
|
||||
struct pipe_grid_info info = {0};
|
||||
|
||||
bool dst_is_1d = dst->target == PIPE_TEXTURE_1D ||
|
||||
dst->target == PIPE_TEXTURE_1D_ARRAY;
|
||||
bool src_is_1d = src->target == PIPE_TEXTURE_1D ||
|
||||
src->target == PIPE_TEXTURE_1D_ARRAY;
|
||||
int block_x, block_y;
|
||||
int block_z = 1;
|
||||
|
||||
/* Choose the block dimensions based on the copy area size. */
|
||||
if (src_box->height <= 4) {
|
||||
block_y = util_next_power_of_two(src_box->height);
|
||||
block_x = 64 / block_y;
|
||||
} else if (src_box->width <= 4) {
|
||||
block_x = util_next_power_of_two(src_box->width);
|
||||
block_y = 64 / block_x;
|
||||
} else if (is_linear) {
|
||||
block_x = 64;
|
||||
block_y = 1;
|
||||
} else {
|
||||
block_x = 8;
|
||||
block_y = 8;
|
||||
}
|
||||
|
||||
sctx->cs_user_data[0] = src_box->x | (dstx << 16);
|
||||
sctx->cs_user_data[1] = src_box->y | (dsty << 16);
|
||||
sctx->cs_user_data[2] = src_box->z | (dstz << 16);
|
||||
|
||||
unsigned wg_dim =
|
||||
set_work_size(&info, block_x, block_y, block_z,
|
||||
src_box->width, src_box->height, src_box->depth);
|
||||
|
||||
void **copy_image_cs_ptr = &sctx->cs_copy_image[wg_dim - 1][src_is_1d][dst_is_1d];
|
||||
if (!*copy_image_cs_ptr)
|
||||
*copy_image_cs_ptr = si_create_copy_image_cs(sctx, wg_dim, src_is_1d, dst_is_1d);
|
||||
|
||||
assert(*copy_image_cs_ptr);
|
||||
|
||||
si_launch_grid_internal_images(sctx, image, 2, &info, *copy_image_cs_ptr, flags);
|
||||
return true;
|
||||
}
|
||||
|
||||
void si_retile_dcc(struct si_context *sctx, struct si_texture *tex)
|
||||
{
|
||||
assert(sctx->gfx_level < GFX12);
|
||||
|
|
|
|||
|
|
@ -272,14 +272,6 @@ static void si_destroy_context(struct pipe_context *context)
|
|||
sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_buffer);
|
||||
if (sctx->cs_ubyte_to_ushort)
|
||||
sctx->b.delete_compute_state(&sctx->b, sctx->cs_ubyte_to_ushort);
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(sctx->cs_copy_image); i++) {
|
||||
for (unsigned j = 0; j < ARRAY_SIZE(sctx->cs_copy_image[i]); j++) {
|
||||
for (unsigned k = 0; k < ARRAY_SIZE(sctx->cs_copy_image[i][j]); k++) {
|
||||
if (sctx->cs_copy_image[i][j][k])
|
||||
sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image[i][j][k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (sctx->cs_clear_12bytes_buffer)
|
||||
sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_12bytes_buffer);
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(sctx->cs_dcc_retile); i++) {
|
||||
|
|
|
|||
|
|
@ -990,7 +990,6 @@ struct si_context {
|
|||
void *cs_clear_buffer_rmw;
|
||||
void *cs_copy_buffer;
|
||||
void *cs_ubyte_to_ushort;
|
||||
void *cs_copy_image[3][2][2]; /* [wg_dim-1][src_is_1d][dst_is_1d] */
|
||||
void *cs_clear_12bytes_buffer;
|
||||
void *cs_dcc_retile[32];
|
||||
void *cs_fmask_expand[3][2]; /* [log2(samples)-1][is_array] */
|
||||
|
|
@ -1502,10 +1501,6 @@ void si_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, struct p
|
|||
uint64_t dst_offset, uint64_t src_offset, unsigned size, unsigned flags);
|
||||
void si_compute_shorten_ubyte_buffer(struct si_context *sctx, struct pipe_resource *dst, struct pipe_resource *src,
|
||||
uint64_t dst_offset, uint64_t src_offset, unsigned size, unsigned flags);
|
||||
bool si_compute_copy_image_old(struct si_context *sctx, struct pipe_resource *dst, unsigned dst_level,
|
||||
struct pipe_resource *src, unsigned src_level, unsigned dstx,
|
||||
unsigned dsty, unsigned dstz, const struct pipe_box *src_box,
|
||||
unsigned flags);
|
||||
void si_compute_clear_image_dcc_single(struct si_context *sctx, struct si_texture *tex,
|
||||
unsigned level, enum pipe_format format,
|
||||
const union pipe_color_union *color, unsigned flags);
|
||||
|
|
@ -1632,8 +1627,6 @@ void si_suspend_queries(struct si_context *sctx);
|
|||
void si_resume_queries(struct si_context *sctx);
|
||||
|
||||
/* si_shaderlib_nir.c */
|
||||
void *si_create_copy_image_cs(struct si_context *sctx, unsigned wg_dim,
|
||||
bool src_is_1d_array, bool dst_is_1d_array);
|
||||
void *si_create_dcc_retile_cs(struct si_context *sctx, struct radeon_surf *surf);
|
||||
void *gfx9_create_clear_dcc_msaa_cs(struct si_context *sctx, struct si_texture *tex);
|
||||
void *si_create_passthrough_tcs(struct si_context *sctx);
|
||||
|
|
|
|||
|
|
@ -48,71 +48,6 @@ deref_ssa(nir_builder *b, nir_variable *var)
|
|||
return &nir_build_deref_var(b, var)->def;
|
||||
}
|
||||
|
||||
/* Create a NIR compute shader implementing copy_image.
|
||||
*
|
||||
* This shader can handle 1D and 2D, linear and non-linear images.
|
||||
* It expects the source and destination (x,y,z) coords as user_data_amd,
|
||||
* packed into 3 SGPRs as 2x16bits per component.
|
||||
*/
|
||||
void *si_create_copy_image_cs(struct si_context *sctx, unsigned wg_dim,
|
||||
bool src_is_1d_array, bool dst_is_1d_array)
|
||||
{
|
||||
const nir_shader_compiler_options *options =
|
||||
sctx->b.screen->get_compiler_options(sctx->b.screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE);
|
||||
|
||||
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "copy_image_cs");
|
||||
b.shader->info.num_images = 2;
|
||||
|
||||
/* The workgroup size is either 8x8 for normal (non-linear) 2D images,
|
||||
* or 64x1 for 1D and linear-2D images.
|
||||
*/
|
||||
b.shader->info.workgroup_size_variable = true;
|
||||
|
||||
b.shader->info.cs.user_data_components_amd = 3;
|
||||
nir_def *ids = nir_pad_vector_imm_int(&b, get_global_ids(&b, wg_dim), 0, 3);
|
||||
|
||||
nir_def *coord_src = NULL, *coord_dst = NULL;
|
||||
unpack_2x16(&b, nir_trim_vector(&b, nir_load_user_data_amd(&b), 3),
|
||||
&coord_src, &coord_dst);
|
||||
|
||||
coord_src = nir_iadd(&b, coord_src, ids);
|
||||
coord_dst = nir_iadd(&b, coord_dst, ids);
|
||||
|
||||
/* Coordinates must have 4 channels in NIR. */
|
||||
coord_src = nir_pad_vector(&b, coord_src, 4);
|
||||
coord_dst = nir_pad_vector(&b, coord_dst, 4);
|
||||
|
||||
static unsigned swizzle_xz[] = {0, 2, 0, 0};
|
||||
|
||||
if (src_is_1d_array)
|
||||
coord_src = nir_swizzle(&b, coord_src, swizzle_xz, 4);
|
||||
if (dst_is_1d_array)
|
||||
coord_dst = nir_swizzle(&b, coord_dst, swizzle_xz, 4);
|
||||
|
||||
const struct glsl_type *src_img_type = glsl_image_type(src_is_1d_array ? GLSL_SAMPLER_DIM_1D
|
||||
: GLSL_SAMPLER_DIM_2D,
|
||||
/*is_array*/ true, GLSL_TYPE_FLOAT);
|
||||
const struct glsl_type *dst_img_type = glsl_image_type(dst_is_1d_array ? GLSL_SAMPLER_DIM_1D
|
||||
: GLSL_SAMPLER_DIM_2D,
|
||||
/*is_array*/ true, GLSL_TYPE_FLOAT);
|
||||
|
||||
nir_variable *img_src = nir_variable_create(b.shader, nir_var_image, src_img_type, "img_src");
|
||||
img_src->data.binding = 0;
|
||||
|
||||
nir_variable *img_dst = nir_variable_create(b.shader, nir_var_image, dst_img_type, "img_dst");
|
||||
img_dst->data.binding = 1;
|
||||
|
||||
nir_def *undef32 = nir_undef(&b, 1, 32);
|
||||
nir_def *zero = nir_imm_int(&b, 0);
|
||||
|
||||
nir_def *data = nir_image_deref_load(&b, /*num_components*/ 4, /*bit_size*/ 32,
|
||||
deref_ssa(&b, img_src), coord_src, undef32, zero);
|
||||
|
||||
nir_image_deref_store(&b, deref_ssa(&b, img_dst), coord_dst, undef32, data, zero);
|
||||
|
||||
return create_shader_state(sctx, b.shader);
|
||||
}
|
||||
|
||||
void *si_create_dcc_retile_cs(struct si_context *sctx, struct radeon_surf *surf)
|
||||
{
|
||||
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, sctx->screen->nir_options,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue