mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 04:58:05 +02:00
amd/radeonsi: dont clamp packed user varyings
ac_nir_optimize_outputs() might pack user varyings into the color
built-ins. If this happens we skip adding clamping to the
components that contain the user varying.
This change also fixes a second bug where a color built-in can be
packed into a non-color slot and was no longer being clamped.
Fixes: 3777a5d7 ("radeonsi: assign param export indices before compilation")
Closes: #14443
Reviewed-by: Marek Olšák <maraeo@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40594>
This commit is contained in:
parent
0684976de8
commit
a42c55da46
4 changed files with 50 additions and 8 deletions
|
|
@ -50,6 +50,8 @@ typedef struct
|
|||
uint8_t stream;
|
||||
/* Bitmask of components used: 4 bits per slot, 1 bit per component. */
|
||||
uint8_t components_mask : 4;
|
||||
/* Bitmask of components that should be clamped. */
|
||||
uint8_t clamp_components_mask : 4;
|
||||
/* Bitmask of components that are used as varying, 1 bit per component. */
|
||||
uint8_t as_varying_mask : 4;
|
||||
/* Bitmask of components that are used as sysval, 1 bit per component. */
|
||||
|
|
|
|||
|
|
@ -126,6 +126,9 @@ void ac_nir_gather_prerast_store_output_info(nir_builder *b, nir_intrinsic_instr
|
|||
/* Components of the same output slot may belong to different streams. */
|
||||
info->stream |= stream << (c * 2);
|
||||
info->components_mask |= BITFIELD_BIT(c);
|
||||
if (io_sem.clamp) {
|
||||
info->clamp_components_mask |= BITFIELD_BIT(c);
|
||||
}
|
||||
|
||||
if (!io_sem.no_varying)
|
||||
info->as_varying_mask |= BITFIELD_BIT(c);
|
||||
|
|
@ -782,21 +785,27 @@ ac_nir_clamp_vertex_color_outputs(nir_builder *b, ac_nir_prerast_out *out)
|
|||
VARYING_BIT_BFC0 | VARYING_BIT_BFC1)))
|
||||
return;
|
||||
|
||||
nir_def *color_channels[16] = {0};
|
||||
unsigned i = 0;
|
||||
nir_def **color_channels[16] = {0};
|
||||
nir_def *color_channels_clamped[16] = {0};
|
||||
|
||||
nir_if *if_clamp = nir_push_if(b, nir_load_clamp_vertex_color_amd(b));
|
||||
{
|
||||
for (unsigned i = 0; i < 16; i++) {
|
||||
const unsigned slot = (i / 8 ? VARYING_SLOT_BFC0 : VARYING_SLOT_COL0) + (i % 8) / 4;
|
||||
if (out->outputs[slot][i % 4])
|
||||
color_channels[i] = nir_fsat(b, out->outputs[slot][i % 4]);
|
||||
for (unsigned slot = 0; slot < NUM_TOTAL_VARYING_SLOTS; slot++) {
|
||||
unsigned clamp_mask = out->infos[slot].clamp_components_mask;
|
||||
u_foreach_bit(comp, clamp_mask) {
|
||||
assert(i < 16 && comp < 4);
|
||||
color_channels[i] = &out->outputs[slot][comp];
|
||||
color_channels_clamped[i] = nir_fsat(b, out->outputs[slot][comp]);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
nir_pop_if(b, if_clamp);
|
||||
|
||||
for (unsigned i = 0; i < 16; i++) {
|
||||
if (color_channels[i]) {
|
||||
const unsigned slot = (i / 8 ? VARYING_SLOT_BFC0 : VARYING_SLOT_COL0) + (i % 8) / 4;
|
||||
out->outputs[slot][i % 4] = nir_if_phi(b, color_channels[i], out->outputs[slot][i % 4]);
|
||||
*color_channels[i] = nir_if_phi(b, color_channels_clamped[i], *color_channels[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2118,7 +2118,8 @@ typedef struct nir_io_semantics {
|
|||
|
||||
/* Start of the second uint. */
|
||||
unsigned no_signed_zero : 1; /* whether it matters if the input/output is -0.0 or +0.0. */
|
||||
unsigned padding : 31;
|
||||
unsigned clamp : 1; /* whether the IO component should be clamped when GL_CLAMP_VERTEX_COLOR is enabled */
|
||||
unsigned padding : 30;
|
||||
} nir_io_semantics;
|
||||
|
||||
/* Transform feedback info for 4 outputs. */
|
||||
|
|
|
|||
|
|
@ -51,6 +51,12 @@ bool si_is_merged_shader(struct si_shader *shader)
|
|||
return shader->key.ge.as_ngg || si_is_multi_part_shader(shader);
|
||||
}
|
||||
|
||||
static bool si_is_color_builtin(unsigned loc)
|
||||
{
|
||||
return loc == VARYING_SLOT_COL0 || loc == VARYING_SLOT_COL1 ||
|
||||
loc == VARYING_SLOT_BFC0 || loc == VARYING_SLOT_BFC1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a unique index for a semantic name and index. The index must be
|
||||
* less than 64, so that a 64-bit bitmask of used inputs or outputs can be
|
||||
|
|
@ -593,6 +599,30 @@ static void si_assign_param_offsets(nir_shader *nir, struct si_shader *shader,
|
|||
memset(temp_info->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
|
||||
sizeof(temp_info->vs_output_param_offset));
|
||||
|
||||
/* Before we mess with io locations set clamp flag for color builtins */
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX ||
|
||||
nir->info.stage == MESA_SHADER_TESS_EVAL ||
|
||||
nir->info.stage == MESA_SHADER_GEOMETRY) {
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
||||
assert(impl);
|
||||
nir_foreach_block(block, impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
if (intr->intrinsic != nir_intrinsic_store_output)
|
||||
continue;
|
||||
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||
if (si_is_color_builtin(sem.location)) {
|
||||
sem.clamp = 1;
|
||||
nir_intrinsic_set_io_semantics(intr,sem);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* A slot remapping table for duplicated outputs, so that 1 vertex shader output can be
|
||||
* mapped to multiple fragment shader inputs.
|
||||
*/
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue