mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
radeonsi: replace the clear_12bytes_buffer shader with the DMA compute shader
It can handle 12-byte clear values with these trivial changes. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29053>
This commit is contained in:
parent
995e7d927c
commit
b771d13557
3 changed files with 5 additions and 53 deletions
|
|
@ -285,32 +285,6 @@ void si_compute_clear_buffer_rmw(struct si_context *sctx, struct pipe_resource *
|
|||
1, &sb, 0x1);
|
||||
}
|
||||
|
||||
static void si_compute_clear_12bytes_buffer(struct si_context *sctx, struct pipe_resource *dst,
|
||||
unsigned dst_offset, unsigned size,
|
||||
const uint32_t *clear_value, unsigned flags,
|
||||
enum si_coherency coher)
|
||||
{
|
||||
assert(dst_offset % 4 == 0);
|
||||
assert(size % 4 == 0);
|
||||
unsigned size_12 = DIV_ROUND_UP(size, 12);
|
||||
|
||||
struct pipe_shader_buffer sb = {0};
|
||||
sb.buffer = dst;
|
||||
sb.buffer_offset = dst_offset;
|
||||
sb.buffer_size = size;
|
||||
|
||||
memcpy(sctx->cs_user_data, clear_value, 12);
|
||||
|
||||
struct pipe_grid_info info = {0};
|
||||
set_work_size(&info, 64, 1, 1, size_12, 1, 1);
|
||||
|
||||
if (!sctx->cs_clear_12bytes_buffer)
|
||||
sctx->cs_clear_12bytes_buffer = si_clear_12bytes_buffer_shader(sctx);
|
||||
|
||||
si_launch_grid_internal_ssbos(sctx, &info, sctx->cs_clear_12bytes_buffer, flags, coher,
|
||||
1, &sb, 0x1);
|
||||
}
|
||||
|
||||
static void si_compute_do_clear_or_copy(struct si_context *sctx, struct pipe_resource *dst,
|
||||
unsigned dst_offset, struct pipe_resource *src,
|
||||
unsigned src_offset, unsigned size,
|
||||
|
|
@ -325,7 +299,7 @@ static void si_compute_do_clear_or_copy(struct si_context *sctx, struct pipe_res
|
|||
assert(!src || src_offset + size <= src->width0);
|
||||
|
||||
bool is_copy = src != NULL;
|
||||
unsigned dwords_per_thread = 4;
|
||||
unsigned dwords_per_thread = clear_value_size == 12 ? 3 : 4;
|
||||
unsigned num_threads = DIV_ROUND_UP(size, dwords_per_thread * 4);
|
||||
|
||||
struct pipe_grid_info info = {};
|
||||
|
|
@ -342,13 +316,14 @@ static void si_compute_do_clear_or_copy(struct si_context *sctx, struct pipe_res
|
|||
sb[0].buffer_size = size;
|
||||
} else {
|
||||
assert(clear_value_size >= 4 && clear_value_size <= 16 &&
|
||||
util_is_power_of_two_or_zero(clear_value_size));
|
||||
(clear_value_size == 12 || util_is_power_of_two_or_zero(clear_value_size)));
|
||||
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
sctx->cs_user_data[i] = clear_value[i % (clear_value_size / 4)];
|
||||
}
|
||||
|
||||
void **shader = is_copy ? &sctx->cs_copy_buffer : &sctx->cs_clear_buffer;
|
||||
void **shader = is_copy ? &sctx->cs_copy_buffer :
|
||||
clear_value_size == 12 ? &sctx->cs_clear_12bytes_buffer : &sctx->cs_clear_buffer;
|
||||
if (!*shader)
|
||||
*shader = si_create_dma_compute_shader(sctx, dwords_per_thread, !is_copy);
|
||||
|
||||
|
|
@ -377,11 +352,6 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
|
|||
if (util_lower_clearsize_to_dword(clear_value, (int*)&clear_value_size, &clamped))
|
||||
clear_value = &clamped;
|
||||
|
||||
if (clear_value_size == 12) {
|
||||
si_compute_clear_12bytes_buffer(sctx, dst, offset, size, clear_value, flags, coher);
|
||||
return;
|
||||
}
|
||||
|
||||
uint64_t aligned_size = size & ~3ull;
|
||||
if (aligned_size >= 4) {
|
||||
uint64_t compute_min_size;
|
||||
|
|
|
|||
|
|
@ -1727,7 +1727,6 @@ void *si_create_dma_compute_shader(struct si_context *sctx, unsigned num_dwords_
|
|||
void *si_create_ubyte_to_ushort_compute_shader(struct si_context *sctx);
|
||||
void *si_create_clear_buffer_rmw_cs(struct si_context *sctx);
|
||||
void *si_clear_render_target_shader(struct si_context *sctx, enum pipe_texture_target type);
|
||||
void *si_clear_12bytes_buffer_shader(struct si_context *sctx);
|
||||
void *si_create_fmask_expand_cs(struct si_context *sctx, unsigned num_samples, bool is_array);
|
||||
void *si_create_query_result_cs(struct si_context *sctx);
|
||||
void *gfx11_create_sh_query_result_cs(struct si_context *sctx);
|
||||
|
|
|
|||
|
|
@ -631,23 +631,6 @@ void *si_clear_image_dcc_single_shader(struct si_context *sctx, bool is_msaa, un
|
|||
return create_shader_state(sctx, b.shader);
|
||||
}
|
||||
|
||||
void *si_clear_12bytes_buffer_shader(struct si_context *sctx)
|
||||
{
|
||||
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, sctx->screen->nir_options,
|
||||
"clear_12bytes_buffer");
|
||||
b.shader->info.workgroup_size[0] = 64;
|
||||
b.shader->info.workgroup_size[1] = 1;
|
||||
b.shader->info.workgroup_size[2] = 1;
|
||||
b.shader->info.cs.user_data_components_amd = 3;
|
||||
|
||||
nir_def *offset = nir_imul_imm(&b, get_global_ids(&b, 1), 12);
|
||||
nir_def *value = nir_trim_vector(&b, nir_load_user_data_amd(&b), 3);
|
||||
|
||||
nir_store_ssbo(&b, value, nir_imm_int(&b, 0), offset);
|
||||
|
||||
return create_shader_state(sctx, b.shader);
|
||||
}
|
||||
|
||||
void *si_create_ubyte_to_ushort_compute_shader(struct si_context *sctx)
|
||||
{
|
||||
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, sctx->screen->nir_options,
|
||||
|
|
@ -672,7 +655,7 @@ void *si_create_ubyte_to_ushort_compute_shader(struct si_context *sctx)
|
|||
void *si_create_dma_compute_shader(struct si_context *sctx, unsigned num_dwords_per_thread,
|
||||
bool is_clear)
|
||||
{
|
||||
assert(util_is_power_of_two_nonzero(num_dwords_per_thread) && num_dwords_per_thread <= 4);
|
||||
assert(num_dwords_per_thread && num_dwords_per_thread <= 4);
|
||||
|
||||
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, sctx->screen->nir_options,
|
||||
"create_dma_compute");
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue