From f156abd2a76862aeba3cda8bdad40adecb32a48d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 28 Dec 2024 14:58:11 -0500 Subject: [PATCH] radeonsi: simplify how broadcast_last_cbuf is implemented for PS epilogs We don't need to look at the framebuffer state and record how many color buffers to write. Instead, we can deduce which color buffers are enabled from spi_shader_col_format, which already does the right thing. So PS epilogs only need a single bool flag that determines whether all enabled color buffers should be written. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_shader.c | 4 +++- src/gallium/drivers/radeonsi/si_shader.h | 2 +- src/gallium/drivers/radeonsi/si_shader_aco.c | 2 +- src/gallium/drivers/radeonsi/si_shader_info.c | 9 +++++---- src/gallium/drivers/radeonsi/si_shader_llvm_ps.c | 14 ++++++++------ src/gallium/drivers/radeonsi/si_state_shaders.cpp | 8 +------- 6 files changed, 19 insertions(+), 20 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 2df02422b15..e56be7a3154 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1578,7 +1578,6 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f) key->ps.part.epilog.spi_shader_col_format); fprintf(f, " epilog.color_is_int8 = 0x%X\n", key->ps.part.epilog.color_is_int8); fprintf(f, " epilog.color_is_int10 = 0x%X\n", key->ps.part.epilog.color_is_int10); - fprintf(f, " epilog.last_cbuf = %u\n", key->ps.part.epilog.last_cbuf); fprintf(f, " epilog.alpha_func = %u\n", key->ps.part.epilog.alpha_func); fprintf(f, " epilog.alpha_to_one = %u\n", key->ps.part.epilog.alpha_to_one); fprintf(f, " epilog.alpha_to_coverage_via_mrtz = %u\n", key->ps.part.epilog.alpha_to_coverage_via_mrtz); @@ -3457,6 +3456,9 @@ static void si_get_ps_epilog_key(struct si_shader *shader, union si_shader_part_ key->ps_epilog.uses_discard = si_shader_uses_discard(shader); key->ps_epilog.colors_written = info->colors_written; key->ps_epilog.color_types = info->output_color_types; + key->ps_epilog.writes_all_cbufs = info->color0_writes_all_cbufs && + /* Check whether a non-zero color buffer is bound. */ + !!(shader->key.ps.part.epilog.spi_shader_col_format & 0xfffffff0); key->ps_epilog.writes_z = info->writes_z; key->ps_epilog.writes_stencil = info->writes_stencil; key->ps_epilog.writes_samplemask = info->writes_samplemask; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index d8b9d1f7bb0..7f193ec9cbd 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -684,7 +684,6 @@ struct si_ps_epilog_bits { unsigned spi_shader_col_format; unsigned color_is_int8 : 8; unsigned color_is_int10 : 8; - unsigned last_cbuf : 3; unsigned alpha_func : 3; unsigned alpha_to_one : 1; unsigned alpha_to_coverage_via_mrtz : 1; /* gfx11+ or alpha_to_one */ @@ -718,6 +717,7 @@ union si_shader_part_key { unsigned uses_discard : 1; unsigned colors_written : 8; unsigned color_types : 16; + unsigned writes_all_cbufs : 1; unsigned writes_z : 1; unsigned writes_stencil : 1; unsigned writes_samplemask : 1; diff --git a/src/gallium/drivers/radeonsi/si_shader_aco.c b/src/gallium/drivers/radeonsi/si_shader_aco.c index 59734f3b89b..14706551366 100644 --- a/src/gallium/drivers/radeonsi/si_shader_aco.c +++ b/src/gallium/drivers/radeonsi/si_shader_aco.c @@ -311,7 +311,7 @@ si_aco_build_ps_epilog(struct aco_compiler_options *options, .spi_shader_col_format = key->ps_epilog.states.spi_shader_col_format, .color_is_int8 = key->ps_epilog.states.color_is_int8, .color_is_int10 = key->ps_epilog.states.color_is_int10, - .writes_all_cbufs = key->ps_epilog.states.last_cbuf != 0, + .writes_all_cbufs = key->ps_epilog.writes_all_cbufs, .alpha_func = key->ps_epilog.states.alpha_func, .alpha_to_one = key->ps_epilog.states.alpha_to_one, .alpha_to_coverage_via_mrtz = key->ps_epilog.states.alpha_to_coverage_via_mrtz, diff --git a/src/gallium/drivers/radeonsi/si_shader_info.c b/src/gallium/drivers/radeonsi/si_shader_info.c index 9725de1e818..aa63ef39b30 100644 --- a/src/gallium/drivers/radeonsi/si_shader_info.c +++ b/src/gallium/drivers/radeonsi/si_shader_info.c @@ -636,12 +636,13 @@ void si_nir_scan_shader(struct si_screen *sscreen, struct nir_shader *nir, info->writes_samplemask = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK); info->colors_written = nir->info.outputs_written >> FRAG_RESULT_DATA0; - if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR)) { - info->color0_writes_all_cbufs = true; - info->colors_written |= 0x1; - } if (nir->info.fs.color_is_dual_source) info->colors_written |= 0x2; + if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR)) { + info->colors_written |= 0x1; + info->color0_writes_all_cbufs = info->colors_written == 0x1; + + } } else { info->writes_primid = nir->info.outputs_written & VARYING_BIT_PRIMITIVE_ID; info->writes_viewport_index = nir->info.outputs_written & VARYING_BIT_VIEWPORT; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c b/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c index 252da11fe66..b555d17e604 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c @@ -309,14 +309,15 @@ static void si_llvm_build_clamp_alpha_test(struct si_shader_context *ctx, static void si_export_mrt_color(struct si_shader_context *ctx, LLVMValueRef *color, unsigned index, unsigned first_color_export, unsigned color_type, - struct si_ps_exports *exp) + bool writes_all_cbufs, struct si_ps_exports *exp) { - /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */ - if (ctx->shader->key.ps.part.epilog.last_cbuf > 0) { + if (writes_all_cbufs) { assert(exp->num == first_color_export); - /* Get the export arguments, also find out what the last one is. */ - for (int c = 0; c <= ctx->shader->key.ps.part.epilog.last_cbuf; c++) { + /* This will do nothing for color buffers with SPI_SHADER_COL_FORMAT=ZERO, so always + * iterate over all 8. + */ + for (int c = 0; c < 8; c++) { if (si_llvm_init_ps_export_args(ctx, color, c, exp->num - first_color_export, color_type, &exp->args[exp->num])) { assert(exp->args[exp->num].enabled_channels); @@ -753,7 +754,8 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part int write_i = u_bit_scan(&colors_written); unsigned color_type = (key->ps_epilog.color_types >> (write_i * 2)) & 0x3; - si_export_mrt_color(ctx, color[write_i], write_i, first_color_export, color_type, &exp); + si_export_mrt_color(ctx, color[write_i], write_i, first_color_export, color_type, + key->ps_epilog.writes_all_cbufs, &exp); } if (exp.num) { diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 4286e1f3908..20d4e07998a 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -2602,12 +2602,6 @@ void si_ps_key_update_framebuffer(struct si_context *sctx) if (!sel) return; - if (sel->info.color0_writes_all_cbufs && - sel->info.colors_written == 0x1) - key->ps.part.epilog.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1; - else - key->ps.part.epilog.last_cbuf = 0; - /* ps_uses_fbfetch is true only if the color buffer is bound. */ if (sctx->ps_uses_fbfetch) { struct pipe_surface *cb0 = sctx->framebuffer.state.cbufs[0]; @@ -2728,7 +2722,7 @@ void si_ps_key_update_framebuffer_blend_dsa_rasterizer(struct si_context *sctx) } /* Disable unwritten outputs (if WRITE_ALL_CBUFS isn't enabled). */ - if (!key->ps.part.epilog.last_cbuf) { + if (!sel->info.color0_writes_all_cbufs) { key->ps.part.epilog.spi_shader_col_format &= sel->info.colors_written_4bit; key->ps.part.epilog.color_is_int8 &= sel->info.colors_written; key->ps.part.epilog.color_is_int10 &= sel->info.colors_written;