diff --git a/src/amd/compiler/tests/test_isel.cpp b/src/amd/compiler/tests/test_isel.cpp index 4a1bf9f9cf8..5b8113405c8 100644 --- a/src/amd/compiler/tests/test_isel.cpp +++ b/src/amd/compiler/tests/test_isel.cpp @@ -23,14 +23,14 @@ BEGIN_TEST(isel.interp.simple) layout(location = 0) in vec4 in_color; layout(location = 0) out vec4 out_color; void main() { - //>> v1: %b_tmp = v_interp_p1_f32 %bx, %pm:m0 attr0.z - //! v1: %b = v_interp_p2_f32 %by, %pm:m0, (kill)%b_tmp attr0.z - //! v1: %a_tmp = v_interp_p1_f32 %bx, %pm:m0 attr0.w + //>> v1: %a_tmp = v_interp_p1_f32 %bx, %pm:m0 attr0.w //! v1: %a = v_interp_p2_f32 %by, %pm:m0, (kill)%a_tmp attr0.w //! v1: %r_tmp = v_interp_p1_f32 %bx, %pm:m0 attr0.x //! v1: %r = v_interp_p2_f32 %by, %pm:m0, (kill)%r_tmp attr0.x - //! v1: %g_tmp = v_interp_p1_f32 (kill)%bx, %pm:m0 attr0.y - //! v1: %g = v_interp_p2_f32 (kill)%by, (kill)%pm:m0, (kill)%g_tmp attr0.y + //! v1: %g_tmp = v_interp_p1_f32 %bx, %pm:m0 attr0.y + //! v1: %g = v_interp_p2_f32 %by, %pm:m0, (kill)%g_tmp attr0.y + //! v1: %b_tmp = v_interp_p1_f32 (kill)%bx, %pm:m0 attr0.z + //! v1: %b = v_interp_p2_f32 (kill)%by, (kill)%pm:m0, (kill)%b_tmp attr0.z //! exp (kill)%r, (kill)%g, (kill)%b, (kill)%a mrt0 out_color = in_color; } diff --git a/src/amd/vulkan/meson.build b/src/amd/vulkan/meson.build index 311cd8fc063..ddb9b56d3d6 100644 --- a/src/amd/vulkan/meson.build +++ b/src/amd/vulkan/meson.build @@ -92,6 +92,7 @@ libradv_files = files( 'nir/radv_nir_rt_stage_cps.c', 'nir/radv_nir_rt_stage_monolithic.c', 'nir/radv_nir_rt_traversal_shader.c', + 'nir/radv_nir_trim_fs_color_exports.c', 'radv_acceleration_structure.c', 'radv_android.c', 'radv_android.h', diff --git a/src/amd/vulkan/nir/radv_nir.h b/src/amd/vulkan/nir/radv_nir.h index 5ea62928f25..d7a23ec5188 100644 --- a/src/amd/vulkan/nir/radv_nir.h +++ b/src/amd/vulkan/nir/radv_nir.h @@ -78,6 +78,8 @@ bool radv_nir_lower_draw_id_to_zero(nir_shader *shader); bool radv_nir_remap_color_attachment(nir_shader *shader, const struct radv_graphics_state_key *gfx_state); +bool radv_nir_trim_fs_color_exports(nir_shader *shader, uint32_t colors_needed); + bool radv_nir_lower_printf(nir_shader *shader); typedef struct radv_nir_opt_tid_function_options { diff --git a/src/amd/vulkan/nir/radv_nir_trim_fs_color_exports.c b/src/amd/vulkan/nir/radv_nir_trim_fs_color_exports.c new file mode 100644 index 00000000000..da4da0e993a --- /dev/null +++ b/src/amd/vulkan/nir/radv_nir_trim_fs_color_exports.c @@ -0,0 +1,48 @@ +/* + * Copyright © 2025 Valve Corporation + * + * SPDX-License-Identifier: MIT + */ + +#include "nir/nir.h" +#include "nir/nir_builder.h" +#include "radv_constants.h" +#include "radv_nir.h" + +static bool +trim_fs_color_exports(nir_builder *b, nir_intrinsic_instr *intrin, void *state) +{ + const uint32_t colors_needed = *(uint32_t *)state; + + if (intrin->intrinsic != nir_intrinsic_store_output) + return false; + + nir_io_semantics io_sem = nir_intrinsic_io_semantics(intrin); + + int index = mesa_frag_result_get_color_index(io_sem.location); + + if (index < 0) + return false; + + const unsigned needed = (colors_needed >> (index * 4) & 0xf) >> nir_intrinsic_component(intrin); + + const unsigned write_mask = nir_intrinsic_write_mask(intrin); + + const unsigned new_write_mask = write_mask & needed; + + if (new_write_mask == write_mask) + return false; + + if (!new_write_mask) + nir_instr_remove(&intrin->instr); + else + nir_intrinsic_set_write_mask(intrin, new_write_mask); + + return true; +} + +bool +radv_nir_trim_fs_color_exports(nir_shader *shader, uint32_t colors_needed) +{ + return nir_shader_intrinsics_pass(shader, trim_fs_color_exports, nir_metadata_control_flow, &colors_needed); +} diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index ace60c654d6..b8a2b24f9ac 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -1857,6 +1857,13 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_ cf = radv_choose_spi_color_format(device, fmt, blend_enable, state->need_src_alpha & (1 << i)); + uint32_t comp_used = util_format_colormask(vk_format_description(fmt)); + + comp_used &= (state->color_write_mask >> (i * 4)); + comp_used |= ((state->need_src_alpha >> i) & 0x1) << 3; + + key.colors_needed |= comp_used << (4 * i); + if (format_is_int8(fmt)) is_int8 |= 1 << i; if (format_is_int10(fmt)) @@ -1876,6 +1883,7 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_ */ col_format |= V_028714_SPI_SHADER_32_AR; key.color_map[0] = 0; + key.colors_needed |= 0x8; } /* The output for dual source blending should have the same format as the first output. */ @@ -1883,6 +1891,7 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_ assert(!(col_format >> 4)); col_format |= (col_format & 0xf) << 4; key.color_map[1] = 1; + key.colors_needed |= (key.colors_needed & 0xf) << 4; } z_format = ac_get_spi_shader_z_format(state->export_depth, state->export_stencil, state->export_sample_mask, @@ -1893,7 +1902,7 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_ key.color_is_int10 = pdev->info.has_cb_lt16bit_int_clamp_bug ? is_int10 : 0; key.enable_mrt_output_nan_fixup = instance->drirc.debug.enable_mrt_output_nan_fixup ? is_float32 : 0; key.colors_written = state->colors_written; - key.mrt0_is_dual_src = state->mrt0_is_dual_src; + key.mrt0_is_dual_src = state->mrt0_is_dual_src && key.colors_needed & 0xf; key.export_depth = state->export_depth; key.export_stencil = state->export_stencil; key.export_sample_mask = state->export_sample_mask; @@ -2898,6 +2907,19 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac if (!gfx_state->ps.has_epilog) { NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_remap_color_attachment, gfx_state); + + /* Lower FS outputs to scalar to allow dce. */ + NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL); + + /* TODO it seems like some internal shaders use render target formats with too few components. */ + if (!stages[MESA_SHADER_FRAGMENT].nir->info.internal) { + NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_trim_fs_color_exports, + gfx_state->ps.epilog.colors_needed); + } + + NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_copy_prop); + NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dce); + NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dead_cf); } NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_lower_fs_input_attachment); diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 7d607cc1ac1..22918fffb1c 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -102,6 +102,8 @@ struct radv_ps_epilog_key { uint8_t color_is_int10; uint8_t enable_mrt_output_nan_fixup; + uint32_t colors_needed; + uint32_t colors_written; uint8_t color_map[MAX_RTS]; bool mrt0_is_dual_src;