amd/radeonsi: dont clamp packed user varyings

ac_nir_optimize_outputs() might pack user varyings into the color
built-ins. If this happens we skip adding clamping to the
components that contain the user varying.

This change also fixes a second bug where a color built-in can be
packed into a non-color slot and was no longer being clamped.

Fixes: 3777a5d7 ("radeonsi: assign param export indices before compilation")
Closes: #14443

Reviewed-by: Marek Olšák <maraeo@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40594>
This commit is contained in:
Timothy Arceri 2026-04-24 11:49:34 +10:00 committed by Marge Bot
parent 0684976de8
commit a42c55da46
4 changed files with 50 additions and 8 deletions

View file

@ -50,6 +50,8 @@ typedef struct
uint8_t stream;
/* Bitmask of components used: 4 bits per slot, 1 bit per component. */
uint8_t components_mask : 4;
/* Bitmask of components that should be clamped. */
uint8_t clamp_components_mask : 4;
/* Bitmask of components that are used as varying, 1 bit per component. */
uint8_t as_varying_mask : 4;
/* Bitmask of components that are used as sysval, 1 bit per component. */

View file

@ -126,6 +126,9 @@ void ac_nir_gather_prerast_store_output_info(nir_builder *b, nir_intrinsic_instr
/* Components of the same output slot may belong to different streams. */
info->stream |= stream << (c * 2);
info->components_mask |= BITFIELD_BIT(c);
if (io_sem.clamp) {
info->clamp_components_mask |= BITFIELD_BIT(c);
}
if (!io_sem.no_varying)
info->as_varying_mask |= BITFIELD_BIT(c);
@ -782,21 +785,27 @@ ac_nir_clamp_vertex_color_outputs(nir_builder *b, ac_nir_prerast_out *out)
VARYING_BIT_BFC0 | VARYING_BIT_BFC1)))
return;
nir_def *color_channels[16] = {0};
unsigned i = 0;
nir_def **color_channels[16] = {0};
nir_def *color_channels_clamped[16] = {0};
nir_if *if_clamp = nir_push_if(b, nir_load_clamp_vertex_color_amd(b));
{
for (unsigned i = 0; i < 16; i++) {
const unsigned slot = (i / 8 ? VARYING_SLOT_BFC0 : VARYING_SLOT_COL0) + (i % 8) / 4;
if (out->outputs[slot][i % 4])
color_channels[i] = nir_fsat(b, out->outputs[slot][i % 4]);
for (unsigned slot = 0; slot < NUM_TOTAL_VARYING_SLOTS; slot++) {
unsigned clamp_mask = out->infos[slot].clamp_components_mask;
u_foreach_bit(comp, clamp_mask) {
assert(i < 16 && comp < 4);
color_channels[i] = &out->outputs[slot][comp];
color_channels_clamped[i] = nir_fsat(b, out->outputs[slot][comp]);
i++;
}
}
}
nir_pop_if(b, if_clamp);
for (unsigned i = 0; i < 16; i++) {
if (color_channels[i]) {
const unsigned slot = (i / 8 ? VARYING_SLOT_BFC0 : VARYING_SLOT_COL0) + (i % 8) / 4;
out->outputs[slot][i % 4] = nir_if_phi(b, color_channels[i], out->outputs[slot][i % 4]);
*color_channels[i] = nir_if_phi(b, color_channels_clamped[i], *color_channels[i]);
}
}
}

View file

@ -2118,7 +2118,8 @@ typedef struct nir_io_semantics {
/* Start of the second uint. */
unsigned no_signed_zero : 1; /* whether it matters if the input/output is -0.0 or +0.0. */
unsigned padding : 31;
unsigned clamp : 1; /* whether the IO component should be clamped when GL_CLAMP_VERTEX_COLOR is enabled */
unsigned padding : 30;
} nir_io_semantics;
/* Transform feedback info for 4 outputs. */

View file

@ -51,6 +51,12 @@ bool si_is_merged_shader(struct si_shader *shader)
return shader->key.ge.as_ngg || si_is_multi_part_shader(shader);
}
static bool si_is_color_builtin(unsigned loc)
{
return loc == VARYING_SLOT_COL0 || loc == VARYING_SLOT_COL1 ||
loc == VARYING_SLOT_BFC0 || loc == VARYING_SLOT_BFC1;
}
/**
* Returns a unique index for a semantic name and index. The index must be
* less than 64, so that a 64-bit bitmask of used inputs or outputs can be
@ -593,6 +599,30 @@ static void si_assign_param_offsets(nir_shader *nir, struct si_shader *shader,
memset(temp_info->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
sizeof(temp_info->vs_output_param_offset));
/* Before we mess with io locations set clamp flag for color builtins */
if (nir->info.stage == MESA_SHADER_VERTEX ||
nir->info.stage == MESA_SHADER_TESS_EVAL ||
nir->info.stage == MESA_SHADER_GEOMETRY) {
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
assert(impl);
nir_foreach_block(block, impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic != nir_intrinsic_store_output)
continue;
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
if (si_is_color_builtin(sem.location)) {
sem.clamp = 1;
nir_intrinsic_set_io_semantics(intr,sem);
}
}
}
}
/* A slot remapping table for duplicated outputs, so that 1 vertex shader output can be
* mapped to multiple fragment shader inputs.
*/