Merge branch 'radv-fs-output-nsz' into 'main'

nir,radv: remove signed zero preserve based on FS output format or blending

See merge request mesa/mesa!40323
This commit is contained in:
Georg Lehmann 2026-03-11 05:54:47 +01:00
commit b5bd9ef6aa
8 changed files with 101 additions and 17 deletions

View file

@ -24,6 +24,7 @@ struct radv_shader_args;
struct radv_shader_layout;
struct radv_device;
struct radv_graphics_state_key;
struct radv_ps_epilog_key;
bool radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device,
const struct radv_shader_stage *stage);
@ -78,7 +79,7 @@ bool radv_nir_lower_draw_id_to_zero(nir_shader *shader);
bool radv_nir_remap_color_attachment(nir_shader *shader, const struct radv_graphics_state_key *gfx_state);
bool radv_nir_trim_fs_color_exports(nir_shader *shader, uint32_t colors_needed);
bool radv_nir_trim_fs_color_exports(nir_shader *shader, const struct radv_ps_epilog_key *epilog_key);
bool radv_nir_lower_printf(nir_shader *shader);

View file

@ -8,11 +8,12 @@
#include "nir/nir_builder.h"
#include "radv_constants.h"
#include "radv_nir.h"
#include "radv_shader.h"
static bool
trim_fs_color_exports(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
{
const uint32_t colors_needed = *(uint32_t *)state;
const struct radv_ps_epilog_key *epilog_key = (const struct radv_ps_epilog_key *)state;
if (intrin->intrinsic != nir_intrinsic_store_output)
return false;
@ -24,14 +25,26 @@ trim_fs_color_exports(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
if (index < 0)
return false;
const unsigned needed = (colors_needed >> (index * 4) & 0xf) >> nir_intrinsic_component(intrin);
bool progress = false;
if (epilog_key->no_signed_zero & BITFIELD_BIT(index)) {
nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
if (!sem.no_signed_zero) {
sem.no_signed_zero = 1;
nir_intrinsic_set_io_semantics(intrin, sem);
progress = true;
}
}
const unsigned needed = (epilog_key->colors_needed >> (index * 4) & 0xf) >> nir_intrinsic_component(intrin);
const unsigned write_mask = nir_intrinsic_write_mask(intrin);
const unsigned new_write_mask = write_mask & needed;
if (new_write_mask == write_mask)
return false;
return progress;
if (!new_write_mask)
nir_instr_remove(&intrin->instr);
@ -42,7 +55,7 @@ trim_fs_color_exports(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
}
bool
radv_nir_trim_fs_color_exports(nir_shader *shader, uint32_t colors_needed)
radv_nir_trim_fs_color_exports(nir_shader *shader, const struct radv_ps_epilog_key *epilog_key)
{
return nir_shader_intrinsics_pass(shader, trim_fs_color_exports, nir_metadata_control_flow, &colors_needed);
return nir_shader_intrinsics_pass(shader, trim_fs_color_exports, nir_metadata_control_flow, (void *)epilog_key);
}

View file

@ -154,6 +154,23 @@ format_is_float32(VkFormat format)
return channel >= 0 && desc->channel[channel].type == UTIL_FORMAT_TYPE_FLOAT && desc->channel[channel].size == 32;
}
static bool
format_ignores_signed_zero(VkFormat format)
{
const struct util_format_description *desc = radv_format_description(format);
/* Unsigned float formats don't care about signed zeros. */
if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT || desc->format == PIPE_FORMAT_R9G9B9E5_FLOAT)
return true;
for (unsigned i = 0; i < desc->nr_channels; i++) {
if (desc->channel[i].pure_integer || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
return false;
}
return true;
}
static bool
radv_pipeline_needs_ps_epilog(const struct vk_graphics_pipeline_state *state,
VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)
@ -1768,7 +1785,7 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_instance *instance = radv_physical_device_instance(pdev);
unsigned col_format = 0, is_int8 = 0, is_int10 = 0, is_float32 = 0, z_format = 0;
unsigned col_format = 0, is_int8 = 0, is_int10 = 0, is_float32 = 0, z_format = 0, no_signed_zero = 0;
struct radv_ps_epilog_key key;
memset(&key, 0, sizeof(key));
@ -1794,6 +1811,8 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_
key.colors_needed |= comp_used << (4 * i);
if (format_ignores_signed_zero(fmt) || blend_enable)
no_signed_zero |= 1 << i;
if (format_is_int8(fmt))
is_int8 |= 1 << i;
if (format_is_int10(fmt))
@ -1822,6 +1841,7 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_
col_format |= (col_format & 0xf) << 4;
key.color_map[1] = 1;
key.colors_needed |= (key.colors_needed & 0xf) << 4;
no_signed_zero |= 0x2;
}
z_format = ac_get_spi_shader_z_format(state->export_depth, state->export_stencil, state->export_sample_mask,
@ -1831,6 +1851,7 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_
key.color_is_int8 = pdev->info.compiler_info.has_cb_lt16bit_int_clamp_bug ? is_int8 : 0;
key.color_is_int10 = pdev->info.compiler_info.has_cb_lt16bit_int_clamp_bug ? is_int10 : 0;
key.enable_mrt_output_nan_fixup = instance->drirc.debug.enable_mrt_output_nan_fixup ? is_float32 : 0;
key.no_signed_zero = no_signed_zero;
key.colors_written = state->colors_written;
key.mrt0_is_dual_src = state->mrt0_is_dual_src && key.colors_needed & 0xf;
key.export_depth = state->export_depth;
@ -2836,8 +2857,7 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
/* Lower FS outputs to scalar to allow dce. */
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_trim_fs_color_exports,
gfx_state->ps.epilog.colors_needed);
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_trim_fs_color_exports, &gfx_state->ps.epilog);
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_copy_prop);
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dce);

View file

@ -104,6 +104,7 @@ struct radv_ps_epilog_key {
uint8_t color_is_int8;
uint8_t color_is_int10;
uint8_t enable_mrt_output_nan_fixup;
uint8_t no_signed_zero;
uint32_t colors_needed;

View file

@ -2054,7 +2054,10 @@ typedef struct nir_io_semantics {
unsigned interp_explicit_strict : 1; /* preserve original vertex order */
/* Skip nir_validate of the intrinsic. Any new code that sets it will ba NAK'd. */
unsigned no_validate : 1;
unsigned padding;
/* Start of the second uint. */
unsigned no_signed_zero : 1; /* whether it matters if the input/output -0.0 or +0.0. */
unsigned padding : 31;
} nir_io_semantics;
/* Transform feedback info for 4 outputs. */

View file

@ -13,9 +13,10 @@
* needed, which is a quite common. For example, any float comparison, cosinus, exp2, log2,
* or addition with non zero value does not care about the zero sign of the inputs. Neither
* do texture coordinates.
* Drivers can also set no_signed_zero for fragment output stores based on state,
* fixed point or R11G11B10 formats do not care about the sign of zero.
*
* Future work could also consider fragment output state, fixed point or R11G11B10 formats
* do not care about the sign of zero.
* Future work:
* For pre raster stages, position doesn't care, and we could back propagate information from
* the FS for varyings, and interpolated varyings do not care anyway.
*/
@ -214,8 +215,8 @@ prop_tex_fp_math_ctrl(nir_tex_instr *tex)
}
}
static void
prop_intrin_fp_math_ctrl(nir_intrinsic_instr *intrin)
static bool
opt_intrin_fp_math_ctrl(nir_intrinsic_instr *intrin)
{
switch (intrin->intrinsic) {
case nir_intrinsic_ddx:
@ -226,11 +227,37 @@ prop_intrin_fp_math_ctrl(nir_intrinsic_instr *intrin)
case nir_intrinsic_ddy_fine:
if (intrin->instr.pass_flags)
src_mark_preserve_sz(&intrin->src[0], NULL);
break;
return false;
default:
nir_foreach_src(&intrin->instr, src_mark_preserve_sz, NULL);
break;
}
if (!nir_intrinsic_has_io_semantics(intrin)) {
nir_foreach_src(&intrin->instr, src_mark_preserve_sz, NULL);
return false;
}
nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
const nir_intrinsic_info *info = &nir_intrinsic_infos[(int)intrin->intrinsic];
if (info->has_dest) {
nir_foreach_src(&intrin->instr, src_mark_preserve_sz, NULL);
/* For loads, set no signed zero flag based on gathered info. */
if (!intrin->instr.pass_flags && !sem.no_signed_zero) {
sem.no_signed_zero = 1;
nir_intrinsic_set_io_semantics(intrin, sem);
return true;
}
return false;
} else {
/* For stores, propagate the signed zero information for the data source. */
for (unsigned i = sem.no_signed_zero; i < info->num_srcs; i++)
src_mark_preserve_sz(&intrin->src[i], NULL);
return false;
}
}
static bool
@ -272,7 +299,7 @@ opt_fp_math_ctrl_impl(nir_function_impl *impl)
prop_tex_fp_math_ctrl(nir_instr_as_tex(instr));
break;
case nir_instr_type_intrinsic:
prop_intrin_fp_math_ctrl(nir_instr_as_intrinsic(instr));
progress |= opt_intrin_fp_math_ctrl(nir_instr_as_intrinsic(instr));
break;
case nir_instr_type_phi:
if (!instr->pass_flags)

View file

@ -176,6 +176,18 @@ vectorize_load(nir_intrinsic_instr *chan[8], unsigned start, unsigned count,
nir_io_semantics sem = nir_intrinsic_io_semantics(new_intr);
for (unsigned i = start; i < start + count; i++) {
if (chan[i]) {
if (!nir_intrinsic_io_semantics(chan[i]).no_signed_zero)
sem.no_signed_zero = 0;
}
if (step == merge_low_high_16_to_32 && chan[4 + i]) {
if (!nir_intrinsic_io_semantics(chan[4 + i]).no_signed_zero)
sem.no_signed_zero = 0;
}
}
if (step == vectorize_high_16_separately) {
assert(start >= 4);
sem.high_16bits = 1;
@ -298,6 +310,8 @@ vectorize_store(nir_intrinsic_instr *chan[8], unsigned start, unsigned count,
sem.no_sysval_output = 0;
if (!nir_intrinsic_io_semantics(chan[i]).no_varying)
sem.no_varying = 0;
if (!nir_intrinsic_io_semantics(chan[i]).no_signed_zero)
sem.no_signed_zero = 0;
}
if (step == merge_low_high_16_to_32) {
@ -307,6 +321,8 @@ vectorize_store(nir_intrinsic_instr *chan[8], unsigned start, unsigned count,
sem.no_sysval_output = 0;
if (!nir_intrinsic_io_semantics(chan[4 + i]).no_varying)
sem.no_varying = 0;
if (!nir_intrinsic_io_semantics(chan[4 + i]).no_signed_zero)
sem.no_signed_zero = 0;
}
/* Update the type. */

View file

@ -1601,6 +1601,9 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
if (io.no_validate)
fprintf(fp, " no_validate");
if (io.no_signed_zero)
fprintf(fp, " no_signed_zero");
break;
}