radeonsi: replace the clear_12bytes_buffer shader with the DMA compute shader

It can handle 12-byte clear values with these trivial changes.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29053>
This commit is contained in:
Marek Olšák 2024-04-27 07:41:38 -04:00 committed by Marge Bot
parent 995e7d927c
commit b771d13557
3 changed files with 5 additions and 53 deletions

View file

@ -285,32 +285,6 @@ void si_compute_clear_buffer_rmw(struct si_context *sctx, struct pipe_resource *
1, &sb, 0x1);
}
static void si_compute_clear_12bytes_buffer(struct si_context *sctx, struct pipe_resource *dst,
unsigned dst_offset, unsigned size,
const uint32_t *clear_value, unsigned flags,
enum si_coherency coher)
{
assert(dst_offset % 4 == 0);
assert(size % 4 == 0);
unsigned size_12 = DIV_ROUND_UP(size, 12);
struct pipe_shader_buffer sb = {0};
sb.buffer = dst;
sb.buffer_offset = dst_offset;
sb.buffer_size = size;
memcpy(sctx->cs_user_data, clear_value, 12);
struct pipe_grid_info info = {0};
set_work_size(&info, 64, 1, 1, size_12, 1, 1);
if (!sctx->cs_clear_12bytes_buffer)
sctx->cs_clear_12bytes_buffer = si_clear_12bytes_buffer_shader(sctx);
si_launch_grid_internal_ssbos(sctx, &info, sctx->cs_clear_12bytes_buffer, flags, coher,
1, &sb, 0x1);
}
static void si_compute_do_clear_or_copy(struct si_context *sctx, struct pipe_resource *dst,
unsigned dst_offset, struct pipe_resource *src,
unsigned src_offset, unsigned size,
@ -325,7 +299,7 @@ static void si_compute_do_clear_or_copy(struct si_context *sctx, struct pipe_res
assert(!src || src_offset + size <= src->width0);
bool is_copy = src != NULL;
unsigned dwords_per_thread = 4;
unsigned dwords_per_thread = clear_value_size == 12 ? 3 : 4;
unsigned num_threads = DIV_ROUND_UP(size, dwords_per_thread * 4);
struct pipe_grid_info info = {};
@ -342,13 +316,14 @@ static void si_compute_do_clear_or_copy(struct si_context *sctx, struct pipe_res
sb[0].buffer_size = size;
} else {
assert(clear_value_size >= 4 && clear_value_size <= 16 &&
util_is_power_of_two_or_zero(clear_value_size));
(clear_value_size == 12 || util_is_power_of_two_or_zero(clear_value_size)));
for (unsigned i = 0; i < 4; i++)
sctx->cs_user_data[i] = clear_value[i % (clear_value_size / 4)];
}
void **shader = is_copy ? &sctx->cs_copy_buffer : &sctx->cs_clear_buffer;
void **shader = is_copy ? &sctx->cs_copy_buffer :
clear_value_size == 12 ? &sctx->cs_clear_12bytes_buffer : &sctx->cs_clear_buffer;
if (!*shader)
*shader = si_create_dma_compute_shader(sctx, dwords_per_thread, !is_copy);
@ -377,11 +352,6 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
if (util_lower_clearsize_to_dword(clear_value, (int*)&clear_value_size, &clamped))
clear_value = &clamped;
if (clear_value_size == 12) {
si_compute_clear_12bytes_buffer(sctx, dst, offset, size, clear_value, flags, coher);
return;
}
uint64_t aligned_size = size & ~3ull;
if (aligned_size >= 4) {
uint64_t compute_min_size;

View file

@ -1727,7 +1727,6 @@ void *si_create_dma_compute_shader(struct si_context *sctx, unsigned num_dwords_
void *si_create_ubyte_to_ushort_compute_shader(struct si_context *sctx);
void *si_create_clear_buffer_rmw_cs(struct si_context *sctx);
void *si_clear_render_target_shader(struct si_context *sctx, enum pipe_texture_target type);
void *si_clear_12bytes_buffer_shader(struct si_context *sctx);
void *si_create_fmask_expand_cs(struct si_context *sctx, unsigned num_samples, bool is_array);
void *si_create_query_result_cs(struct si_context *sctx);
void *gfx11_create_sh_query_result_cs(struct si_context *sctx);

View file

@ -631,23 +631,6 @@ void *si_clear_image_dcc_single_shader(struct si_context *sctx, bool is_msaa, un
return create_shader_state(sctx, b.shader);
}
void *si_clear_12bytes_buffer_shader(struct si_context *sctx)
{
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, sctx->screen->nir_options,
"clear_12bytes_buffer");
b.shader->info.workgroup_size[0] = 64;
b.shader->info.workgroup_size[1] = 1;
b.shader->info.workgroup_size[2] = 1;
b.shader->info.cs.user_data_components_amd = 3;
nir_def *offset = nir_imul_imm(&b, get_global_ids(&b, 1), 12);
nir_def *value = nir_trim_vector(&b, nir_load_user_data_amd(&b), 3);
nir_store_ssbo(&b, value, nir_imm_int(&b, 0), offset);
return create_shader_state(sctx, b.shader);
}
void *si_create_ubyte_to_ushort_compute_shader(struct si_context *sctx)
{
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, sctx->screen->nir_options,
@ -672,7 +655,7 @@ void *si_create_ubyte_to_ushort_compute_shader(struct si_context *sctx)
void *si_create_dma_compute_shader(struct si_context *sctx, unsigned num_dwords_per_thread,
bool is_clear)
{
assert(util_is_power_of_two_nonzero(num_dwords_per_thread) && num_dwords_per_thread <= 4);
assert(num_dwords_per_thread && num_dwords_per_thread <= 4);
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, sctx->screen->nir_options,
"create_dma_compute");