radeonsi: remove the old si_compute_copy_image

It's replaced by the compute blit.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28917>
This commit is contained in:
Marek Olšák 2024-03-25 20:57:05 -04:00 committed by Marge Bot
parent b0c0cca3a7
commit 40bcb588dd
4 changed files with 0 additions and 292 deletions

View file

@ -11,36 +11,6 @@
#include "util/hash_table.h"
#include "util/u_pack_color.h"
static bool si_can_use_compute_blit(struct si_context *sctx, enum pipe_format format,
unsigned num_samples, bool is_store, bool has_dcc)
{
/* TODO: This format fails AMD_TEST=imagecopy. */
if (format == PIPE_FORMAT_A8R8_UNORM && is_store)
return false;
/* MSAA image stores are broken. AMD_DEBUG=nofmask fixes them, implying that the FMASK
* expand pass doesn't work, but let's use the gfx blit, which should be faster because
* it doesn't require expanding the FMASK.
*
* TODO: Broken MSAA stores can cause app issues, though this issue might only affect
* internal blits, not sure.
*
* EQAA image stores are also unimplemented, which should be rejected here after MSAA
* image stores are fixed.
*/
if (num_samples > 1 && is_store)
return false;
if (util_format_is_depth_or_stencil(format))
return false;
/* Image stores support DCC since GFX10. */
if (has_dcc && is_store && sctx->gfx_level < GFX10)
return false;
return true;
}
/* Determine the cache policy. */
static enum si_cache_policy get_cache_policy(struct si_context *sctx, enum si_coherency coher,
uint64_t size)
@ -553,188 +523,6 @@ static void si_launch_grid_internal_images(struct si_context *sctx,
pipe_resource_reference(&saved_image[i].resource, NULL);
}
bool si_compute_copy_image_old(struct si_context *sctx, struct pipe_resource *dst, unsigned dst_level,
struct pipe_resource *src, unsigned src_level, unsigned dstx,
unsigned dsty, unsigned dstz, const struct pipe_box *src_box,
unsigned flags)
{
struct si_texture *ssrc = (struct si_texture*)src;
struct si_texture *sdst = (struct si_texture*)dst;
/* The compute copy is mandatory for compressed and subsampled formats because the gfx copy
* doesn't support them. In all other cases, call si_can_use_compute_blit.
*
* The format is identical (we only need to check the src format) except compressed formats,
* which can be paired with an equivalent integer format.
*/
if (!util_format_is_compressed(src->format) &&
!util_format_is_compressed(dst->format) &&
!util_format_is_subsampled_422(src->format)) {
bool src_can_use_compute_blit =
si_can_use_compute_blit(sctx, src->format, src->nr_samples, false,
vi_dcc_enabled(ssrc, src_level));
if (!src_can_use_compute_blit)
return false;
bool dst_can_use_compute_blit =
si_can_use_compute_blit(sctx, dst->format, dst->nr_samples, true,
vi_dcc_enabled(sdst, dst_level));
if (!dst_can_use_compute_blit && !sctx->has_graphics &&
si_can_use_compute_blit(sctx, dst->format, dst->nr_samples, false,
vi_dcc_enabled(sdst, dst_level))) {
/* Non-graphics context don't have a blitter, so try harder to do
* a compute blit by disabling dcc on the destination texture.
*/
dst_can_use_compute_blit = si_texture_disable_dcc(sctx, sdst);
}
if (!dst_can_use_compute_blit)
return false;
}
enum pipe_format src_format = util_format_linear(src->format);
enum pipe_format dst_format = util_format_linear(dst->format);
bool is_linear = ssrc->surface.is_linear || sdst->surface.is_linear;
assert(util_format_is_subsampled_422(src_format) == util_format_is_subsampled_422(dst_format));
/* Interpret as integer values to avoid NaN issues */
if (!vi_dcc_enabled(ssrc, src_level) &&
!vi_dcc_enabled(sdst, dst_level) &&
src_format == dst_format &&
util_format_is_float(src_format) &&
!util_format_is_compressed(src_format)) {
switch(util_format_get_blocksizebits(src_format)) {
case 16:
src_format = dst_format = PIPE_FORMAT_R16_UINT;
break;
case 32:
src_format = dst_format = PIPE_FORMAT_R32_UINT;
break;
case 64:
src_format = dst_format = PIPE_FORMAT_R32G32_UINT;
break;
case 128:
src_format = dst_format = PIPE_FORMAT_R32G32B32A32_UINT;
break;
default:
assert(false);
}
}
/* Interpret compressed formats as UINT. */
struct pipe_box new_box;
unsigned src_access = 0, dst_access = 0;
/* Note that staging copies do compressed<->UINT, so one of the formats is already UINT. */
if (util_format_is_compressed(src_format) || util_format_is_compressed(dst_format)) {
if (util_format_is_compressed(src_format))
src_access |= SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT;
if (util_format_is_compressed(dst_format))
dst_access |= SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT;
dstx = util_format_get_nblocksx(dst_format, dstx);
dsty = util_format_get_nblocksy(dst_format, dsty);
new_box.x = util_format_get_nblocksx(src_format, src_box->x);
new_box.y = util_format_get_nblocksy(src_format, src_box->y);
new_box.z = src_box->z;
new_box.width = util_format_get_nblocksx(src_format, src_box->width);
new_box.height = util_format_get_nblocksy(src_format, src_box->height);
new_box.depth = src_box->depth;
src_box = &new_box;
if (ssrc->surface.bpe == 8)
src_format = dst_format = PIPE_FORMAT_R16G16B16A16_UINT; /* 64-bit block */
else
src_format = dst_format = PIPE_FORMAT_R32G32B32A32_UINT; /* 128-bit block */
}
if (util_format_is_subsampled_422(src_format)) {
assert(src_format == dst_format);
src_access |= SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT;
dst_access |= SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT;
dstx = util_format_get_nblocksx(src_format, dstx);
src_format = dst_format = PIPE_FORMAT_R32_UINT;
/* Interpreting 422 subsampled format (16 bpp) as 32 bpp
* should force us to divide src_box->x, dstx and width by 2.
* But given that ac_surface allocates this format as 32 bpp
* and that surf_size is then modified to pack the values
* we must keep the original values to get the correct results.
*/
}
/* SNORM blitting has precision issues. Use the SINT equivalent instead, which doesn't
* force DCC decompression.
*/
if (util_format_is_snorm(dst_format))
src_format = dst_format = util_format_snorm_to_sint(dst_format);
if (src_box->width == 0 || src_box->height == 0 || src_box->depth == 0)
return true; /* success - nothing to do */
struct pipe_image_view image[2] = {0};
image[0].resource = src;
image[0].shader_access = image[0].access = PIPE_IMAGE_ACCESS_READ | src_access;
image[0].format = src_format;
image[0].u.tex.level = src_level;
image[0].u.tex.first_layer = 0;
image[0].u.tex.last_layer = util_max_layer(src, src_level);
image[1].resource = dst;
image[1].shader_access = image[1].access = PIPE_IMAGE_ACCESS_WRITE | dst_access;
image[1].format = dst_format;
image[1].u.tex.level = dst_level;
image[1].u.tex.first_layer = 0;
image[1].u.tex.last_layer = util_max_layer(dst, dst_level);
struct pipe_grid_info info = {0};
bool dst_is_1d = dst->target == PIPE_TEXTURE_1D ||
dst->target == PIPE_TEXTURE_1D_ARRAY;
bool src_is_1d = src->target == PIPE_TEXTURE_1D ||
src->target == PIPE_TEXTURE_1D_ARRAY;
int block_x, block_y;
int block_z = 1;
/* Choose the block dimensions based on the copy area size. */
if (src_box->height <= 4) {
block_y = util_next_power_of_two(src_box->height);
block_x = 64 / block_y;
} else if (src_box->width <= 4) {
block_x = util_next_power_of_two(src_box->width);
block_y = 64 / block_x;
} else if (is_linear) {
block_x = 64;
block_y = 1;
} else {
block_x = 8;
block_y = 8;
}
sctx->cs_user_data[0] = src_box->x | (dstx << 16);
sctx->cs_user_data[1] = src_box->y | (dsty << 16);
sctx->cs_user_data[2] = src_box->z | (dstz << 16);
unsigned wg_dim =
set_work_size(&info, block_x, block_y, block_z,
src_box->width, src_box->height, src_box->depth);
void **copy_image_cs_ptr = &sctx->cs_copy_image[wg_dim - 1][src_is_1d][dst_is_1d];
if (!*copy_image_cs_ptr)
*copy_image_cs_ptr = si_create_copy_image_cs(sctx, wg_dim, src_is_1d, dst_is_1d);
assert(*copy_image_cs_ptr);
si_launch_grid_internal_images(sctx, image, 2, &info, *copy_image_cs_ptr, flags);
return true;
}
void si_retile_dcc(struct si_context *sctx, struct si_texture *tex)
{
assert(sctx->gfx_level < GFX12);

View file

@ -272,14 +272,6 @@ static void si_destroy_context(struct pipe_context *context)
sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_buffer);
if (sctx->cs_ubyte_to_ushort)
sctx->b.delete_compute_state(&sctx->b, sctx->cs_ubyte_to_ushort);
for (unsigned i = 0; i < ARRAY_SIZE(sctx->cs_copy_image); i++) {
for (unsigned j = 0; j < ARRAY_SIZE(sctx->cs_copy_image[i]); j++) {
for (unsigned k = 0; k < ARRAY_SIZE(sctx->cs_copy_image[i][j]); k++) {
if (sctx->cs_copy_image[i][j][k])
sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image[i][j][k]);
}
}
}
if (sctx->cs_clear_12bytes_buffer)
sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_12bytes_buffer);
for (unsigned i = 0; i < ARRAY_SIZE(sctx->cs_dcc_retile); i++) {

View file

@ -990,7 +990,6 @@ struct si_context {
void *cs_clear_buffer_rmw;
void *cs_copy_buffer;
void *cs_ubyte_to_ushort;
void *cs_copy_image[3][2][2]; /* [wg_dim-1][src_is_1d][dst_is_1d] */
void *cs_clear_12bytes_buffer;
void *cs_dcc_retile[32];
void *cs_fmask_expand[3][2]; /* [log2(samples)-1][is_array] */
@ -1502,10 +1501,6 @@ void si_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, struct p
uint64_t dst_offset, uint64_t src_offset, unsigned size, unsigned flags);
void si_compute_shorten_ubyte_buffer(struct si_context *sctx, struct pipe_resource *dst, struct pipe_resource *src,
uint64_t dst_offset, uint64_t src_offset, unsigned size, unsigned flags);
bool si_compute_copy_image_old(struct si_context *sctx, struct pipe_resource *dst, unsigned dst_level,
struct pipe_resource *src, unsigned src_level, unsigned dstx,
unsigned dsty, unsigned dstz, const struct pipe_box *src_box,
unsigned flags);
void si_compute_clear_image_dcc_single(struct si_context *sctx, struct si_texture *tex,
unsigned level, enum pipe_format format,
const union pipe_color_union *color, unsigned flags);
@ -1632,8 +1627,6 @@ void si_suspend_queries(struct si_context *sctx);
void si_resume_queries(struct si_context *sctx);
/* si_shaderlib_nir.c */
void *si_create_copy_image_cs(struct si_context *sctx, unsigned wg_dim,
bool src_is_1d_array, bool dst_is_1d_array);
void *si_create_dcc_retile_cs(struct si_context *sctx, struct radeon_surf *surf);
void *gfx9_create_clear_dcc_msaa_cs(struct si_context *sctx, struct si_texture *tex);
void *si_create_passthrough_tcs(struct si_context *sctx);

View file

@ -48,71 +48,6 @@ deref_ssa(nir_builder *b, nir_variable *var)
return &nir_build_deref_var(b, var)->def;
}
/* Create a NIR compute shader implementing copy_image.
*
* This shader can handle 1D and 2D, linear and non-linear images.
* It expects the source and destination (x,y,z) coords as user_data_amd,
* packed into 3 SGPRs as 2x16bits per component.
*/
void *si_create_copy_image_cs(struct si_context *sctx, unsigned wg_dim,
bool src_is_1d_array, bool dst_is_1d_array)
{
const nir_shader_compiler_options *options =
sctx->b.screen->get_compiler_options(sctx->b.screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE);
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "copy_image_cs");
b.shader->info.num_images = 2;
/* The workgroup size is either 8x8 for normal (non-linear) 2D images,
* or 64x1 for 1D and linear-2D images.
*/
b.shader->info.workgroup_size_variable = true;
b.shader->info.cs.user_data_components_amd = 3;
nir_def *ids = nir_pad_vector_imm_int(&b, get_global_ids(&b, wg_dim), 0, 3);
nir_def *coord_src = NULL, *coord_dst = NULL;
unpack_2x16(&b, nir_trim_vector(&b, nir_load_user_data_amd(&b), 3),
&coord_src, &coord_dst);
coord_src = nir_iadd(&b, coord_src, ids);
coord_dst = nir_iadd(&b, coord_dst, ids);
/* Coordinates must have 4 channels in NIR. */
coord_src = nir_pad_vector(&b, coord_src, 4);
coord_dst = nir_pad_vector(&b, coord_dst, 4);
static unsigned swizzle_xz[] = {0, 2, 0, 0};
if (src_is_1d_array)
coord_src = nir_swizzle(&b, coord_src, swizzle_xz, 4);
if (dst_is_1d_array)
coord_dst = nir_swizzle(&b, coord_dst, swizzle_xz, 4);
const struct glsl_type *src_img_type = glsl_image_type(src_is_1d_array ? GLSL_SAMPLER_DIM_1D
: GLSL_SAMPLER_DIM_2D,
/*is_array*/ true, GLSL_TYPE_FLOAT);
const struct glsl_type *dst_img_type = glsl_image_type(dst_is_1d_array ? GLSL_SAMPLER_DIM_1D
: GLSL_SAMPLER_DIM_2D,
/*is_array*/ true, GLSL_TYPE_FLOAT);
nir_variable *img_src = nir_variable_create(b.shader, nir_var_image, src_img_type, "img_src");
img_src->data.binding = 0;
nir_variable *img_dst = nir_variable_create(b.shader, nir_var_image, dst_img_type, "img_dst");
img_dst->data.binding = 1;
nir_def *undef32 = nir_undef(&b, 1, 32);
nir_def *zero = nir_imm_int(&b, 0);
nir_def *data = nir_image_deref_load(&b, /*num_components*/ 4, /*bit_size*/ 32,
deref_ssa(&b, img_src), coord_src, undef32, zero);
nir_image_deref_store(&b, deref_ssa(&b, img_dst), coord_dst, undef32, data, zero);
return create_shader_state(sctx, b.shader);
}
void *si_create_dcc_retile_cs(struct si_context *sctx, struct radeon_surf *surf)
{
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, sctx->screen->nir_options,