diff --git a/src/amd/common/nir/ac_nir_helpers.h b/src/amd/common/nir/ac_nir_helpers.h index cf23f9ac525..916da8c0c1a 100644 --- a/src/amd/common/nir/ac_nir_helpers.h +++ b/src/amd/common/nir/ac_nir_helpers.h @@ -50,6 +50,8 @@ typedef struct uint8_t stream; /* Bitmask of components used: 4 bits per slot, 1 bit per component. */ uint8_t components_mask : 4; + /* Bitmask of components that should be clamped. */ + uint8_t clamp_components_mask : 4; /* Bitmask of components that are used as varying, 1 bit per component. */ uint8_t as_varying_mask : 4; /* Bitmask of components that are used as sysval, 1 bit per component. */ diff --git a/src/amd/common/nir/ac_nir_prerast_utils.c b/src/amd/common/nir/ac_nir_prerast_utils.c index a1f884e40cc..b20016b602c 100644 --- a/src/amd/common/nir/ac_nir_prerast_utils.c +++ b/src/amd/common/nir/ac_nir_prerast_utils.c @@ -126,6 +126,9 @@ void ac_nir_gather_prerast_store_output_info(nir_builder *b, nir_intrinsic_instr /* Components of the same output slot may belong to different streams. */ info->stream |= stream << (c * 2); info->components_mask |= BITFIELD_BIT(c); + if (io_sem.clamp) { + info->clamp_components_mask |= BITFIELD_BIT(c); + } if (!io_sem.no_varying) info->as_varying_mask |= BITFIELD_BIT(c); @@ -782,21 +785,27 @@ ac_nir_clamp_vertex_color_outputs(nir_builder *b, ac_nir_prerast_out *out) VARYING_BIT_BFC0 | VARYING_BIT_BFC1))) return; - nir_def *color_channels[16] = {0}; + unsigned i = 0; + nir_def **color_channels[16] = {0}; + nir_def *color_channels_clamped[16] = {0}; nir_if *if_clamp = nir_push_if(b, nir_load_clamp_vertex_color_amd(b)); { - for (unsigned i = 0; i < 16; i++) { - const unsigned slot = (i / 8 ? VARYING_SLOT_BFC0 : VARYING_SLOT_COL0) + (i % 8) / 4; - if (out->outputs[slot][i % 4]) - color_channels[i] = nir_fsat(b, out->outputs[slot][i % 4]); + for (unsigned slot = 0; slot < NUM_TOTAL_VARYING_SLOTS; slot++) { + unsigned clamp_mask = out->infos[slot].clamp_components_mask; + u_foreach_bit(comp, clamp_mask) { + assert(i < 16 && comp < 4); + color_channels[i] = &out->outputs[slot][comp]; + color_channels_clamped[i] = nir_fsat(b, out->outputs[slot][comp]); + i++; + } } } nir_pop_if(b, if_clamp); + for (unsigned i = 0; i < 16; i++) { if (color_channels[i]) { - const unsigned slot = (i / 8 ? VARYING_SLOT_BFC0 : VARYING_SLOT_COL0) + (i % 8) / 4; - out->outputs[slot][i % 4] = nir_if_phi(b, color_channels[i], out->outputs[slot][i % 4]); + *color_channels[i] = nir_if_phi(b, color_channels_clamped[i], *color_channels[i]); } } } diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index cf3d2a5e20d..e8de9d58704 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2118,7 +2118,8 @@ typedef struct nir_io_semantics { /* Start of the second uint. */ unsigned no_signed_zero : 1; /* whether it matters if the input/output is -0.0 or +0.0. */ - unsigned padding : 31; + unsigned clamp : 1; /* whether the IO component should be clamped when GL_CLAMP_VERTEX_COLOR is enabled */ + unsigned padding : 30; } nir_io_semantics; /* Transform feedback info for 4 outputs. */ diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 85e7dfa49a2..d0a324fcec3 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -51,6 +51,12 @@ bool si_is_merged_shader(struct si_shader *shader) return shader->key.ge.as_ngg || si_is_multi_part_shader(shader); } +static bool si_is_color_builtin(unsigned loc) +{ + return loc == VARYING_SLOT_COL0 || loc == VARYING_SLOT_COL1 || + loc == VARYING_SLOT_BFC0 || loc == VARYING_SLOT_BFC1; +} + /** * Returns a unique index for a semantic name and index. The index must be * less than 64, so that a 64-bit bitmask of used inputs or outputs can be @@ -593,6 +599,30 @@ static void si_assign_param_offsets(nir_shader *nir, struct si_shader *shader, memset(temp_info->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED, sizeof(temp_info->vs_output_param_offset)); + /* Before we mess with io locations set clamp flag for color builtins */ + if (nir->info.stage == MESA_SHADER_VERTEX || + nir->info.stage == MESA_SHADER_TESS_EVAL || + nir->info.stage == MESA_SHADER_GEOMETRY) { + nir_function_impl *impl = nir_shader_get_entrypoint(nir); + assert(impl); + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_store_output) + continue; + + nir_io_semantics sem = nir_intrinsic_io_semantics(intr); + if (si_is_color_builtin(sem.location)) { + sem.clamp = 1; + nir_intrinsic_set_io_semantics(intr,sem); + } + } + } + } + /* A slot remapping table for duplicated outputs, so that 1 vertex shader output can be * mapped to multiple fragment shader inputs. */