radv: eliminate unused FS output channels

For formats that don't have all color channels, there is no reason to
output all of them.
Games often write to R only or RGB formats with non trivial remaining channels.

Foz-DB Navi21:
Totals from 10270 (10.55% of 97347) affected shaders:
MaxWaves: 249166 -> 250950 (+0.72%); split: +0.73%, -0.01%
Instrs: 8442016 -> 8354715 (-1.03%); split: -1.05%, +0.01%
CodeSize: 45939644 -> 45487156 (-0.98%); split: -1.01%, +0.02%
VGPRs: 472584 -> 463784 (-1.86%); split: -1.98%, +0.12%
SpillSGPRs: 1502 -> 1448 (-3.60%)
LDS: 6024192 -> 6011904 (-0.20%)
Inputs: 42463 -> 41773 (-1.62%)
Outputs: 24601 -> 23955 (-2.63%)
Latency: 78011745 -> 77653907 (-0.46%); split: -0.56%, +0.10%
InvThroughput: 19767826 -> 19274046 (-2.50%); split: -2.53%, +0.03%
VClause: 177891 -> 176681 (-0.68%); split: -0.80%, +0.12%
SClause: 236784 -> 235324 (-0.62%); split: -0.72%, +0.10%
Copies: 621048 -> 616096 (-0.80%); split: -1.03%, +0.23%
Branches: 202608 -> 201811 (-0.39%); split: -0.44%, +0.05%
PreSGPRs: 441032 -> 437698 (-0.76%); split: -0.77%, +0.01%
PreVGPRs: 378067 -> 369564 (-2.25%); split: -2.26%, +0.01%
VALU: 5906415 -> 5833179 (-1.24%); split: -1.25%, +0.01%
SALU: 973428 -> 968088 (-0.55%); split: -0.61%, +0.06%
VMEM: 298277 -> 296504 (-0.59%); split: -0.61%, +0.01%
SMEM: 402244 -> 399612 (-0.65%); split: -0.71%, +0.06%

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38853>
This commit is contained in:
Georg Lehmann 2025-12-07 18:22:15 +01:00 committed by Marge Bot
parent 5d2f3065fd
commit 17e597093d
6 changed files with 81 additions and 6 deletions

View file

@ -23,14 +23,14 @@ BEGIN_TEST(isel.interp.simple)
layout(location = 0) in vec4 in_color;
layout(location = 0) out vec4 out_color;
void main() {
//>> v1: %b_tmp = v_interp_p1_f32 %bx, %pm:m0 attr0.z
//! v1: %b = v_interp_p2_f32 %by, %pm:m0, (kill)%b_tmp attr0.z
//! v1: %a_tmp = v_interp_p1_f32 %bx, %pm:m0 attr0.w
//>> v1: %a_tmp = v_interp_p1_f32 %bx, %pm:m0 attr0.w
//! v1: %a = v_interp_p2_f32 %by, %pm:m0, (kill)%a_tmp attr0.w
//! v1: %r_tmp = v_interp_p1_f32 %bx, %pm:m0 attr0.x
//! v1: %r = v_interp_p2_f32 %by, %pm:m0, (kill)%r_tmp attr0.x
//! v1: %g_tmp = v_interp_p1_f32 (kill)%bx, %pm:m0 attr0.y
//! v1: %g = v_interp_p2_f32 (kill)%by, (kill)%pm:m0, (kill)%g_tmp attr0.y
//! v1: %g_tmp = v_interp_p1_f32 %bx, %pm:m0 attr0.y
//! v1: %g = v_interp_p2_f32 %by, %pm:m0, (kill)%g_tmp attr0.y
//! v1: %b_tmp = v_interp_p1_f32 (kill)%bx, %pm:m0 attr0.z
//! v1: %b = v_interp_p2_f32 (kill)%by, (kill)%pm:m0, (kill)%b_tmp attr0.z
//! exp (kill)%r, (kill)%g, (kill)%b, (kill)%a mrt0
out_color = in_color;
}

View file

@ -92,6 +92,7 @@ libradv_files = files(
'nir/radv_nir_rt_stage_cps.c',
'nir/radv_nir_rt_stage_monolithic.c',
'nir/radv_nir_rt_traversal_shader.c',
'nir/radv_nir_trim_fs_color_exports.c',
'radv_acceleration_structure.c',
'radv_android.c',
'radv_android.h',

View file

@ -78,6 +78,8 @@ bool radv_nir_lower_draw_id_to_zero(nir_shader *shader);
bool radv_nir_remap_color_attachment(nir_shader *shader, const struct radv_graphics_state_key *gfx_state);
bool radv_nir_trim_fs_color_exports(nir_shader *shader, uint32_t colors_needed);
bool radv_nir_lower_printf(nir_shader *shader);
typedef struct radv_nir_opt_tid_function_options {

View file

@ -0,0 +1,48 @@
/*
* Copyright © 2025 Valve Corporation
*
* SPDX-License-Identifier: MIT
*/
#include "nir/nir.h"
#include "nir/nir_builder.h"
#include "radv_constants.h"
#include "radv_nir.h"
static bool
trim_fs_color_exports(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
{
const uint32_t colors_needed = *(uint32_t *)state;
if (intrin->intrinsic != nir_intrinsic_store_output)
return false;
nir_io_semantics io_sem = nir_intrinsic_io_semantics(intrin);
int index = mesa_frag_result_get_color_index(io_sem.location);
if (index < 0)
return false;
const unsigned needed = (colors_needed >> (index * 4) & 0xf) >> nir_intrinsic_component(intrin);
const unsigned write_mask = nir_intrinsic_write_mask(intrin);
const unsigned new_write_mask = write_mask & needed;
if (new_write_mask == write_mask)
return false;
if (!new_write_mask)
nir_instr_remove(&intrin->instr);
else
nir_intrinsic_set_write_mask(intrin, new_write_mask);
return true;
}
bool
radv_nir_trim_fs_color_exports(nir_shader *shader, uint32_t colors_needed)
{
return nir_shader_intrinsics_pass(shader, trim_fs_color_exports, nir_metadata_control_flow, &colors_needed);
}

View file

@ -1857,6 +1857,13 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_
cf = radv_choose_spi_color_format(device, fmt, blend_enable, state->need_src_alpha & (1 << i));
uint32_t comp_used = util_format_colormask(vk_format_description(fmt));
comp_used &= (state->color_write_mask >> (i * 4));
comp_used |= ((state->need_src_alpha >> i) & 0x1) << 3;
key.colors_needed |= comp_used << (4 * i);
if (format_is_int8(fmt))
is_int8 |= 1 << i;
if (format_is_int10(fmt))
@ -1876,6 +1883,7 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_
*/
col_format |= V_028714_SPI_SHADER_32_AR;
key.color_map[0] = 0;
key.colors_needed |= 0x8;
}
/* The output for dual source blending should have the same format as the first output. */
@ -1883,6 +1891,7 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_
assert(!(col_format >> 4));
col_format |= (col_format & 0xf) << 4;
key.color_map[1] = 1;
key.colors_needed |= (key.colors_needed & 0xf) << 4;
}
z_format = ac_get_spi_shader_z_format(state->export_depth, state->export_stencil, state->export_sample_mask,
@ -1893,7 +1902,7 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_
key.color_is_int10 = pdev->info.has_cb_lt16bit_int_clamp_bug ? is_int10 : 0;
key.enable_mrt_output_nan_fixup = instance->drirc.debug.enable_mrt_output_nan_fixup ? is_float32 : 0;
key.colors_written = state->colors_written;
key.mrt0_is_dual_src = state->mrt0_is_dual_src;
key.mrt0_is_dual_src = state->mrt0_is_dual_src && key.colors_needed & 0xf;
key.export_depth = state->export_depth;
key.export_stencil = state->export_stencil;
key.export_sample_mask = state->export_sample_mask;
@ -2898,6 +2907,19 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
if (!gfx_state->ps.has_epilog) {
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_remap_color_attachment, gfx_state);
/* Lower FS outputs to scalar to allow dce. */
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
/* TODO it seems like some internal shaders use render target formats with too few components. */
if (!stages[MESA_SHADER_FRAGMENT].nir->info.internal) {
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_trim_fs_color_exports,
gfx_state->ps.epilog.colors_needed);
}
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_copy_prop);
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dce);
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dead_cf);
}
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_lower_fs_input_attachment);

View file

@ -102,6 +102,8 @@ struct radv_ps_epilog_key {
uint8_t color_is_int10;
uint8_t enable_mrt_output_nan_fixup;
uint32_t colors_needed;
uint32_t colors_written;
uint8_t color_map[MAX_RTS];
bool mrt0_is_dual_src;