radv: Reorder potentially per-primitive FS builtins.

There are some FS built-ins that can be per-vertex or
per-primitive depending on whether a mesh shader is used:
primitive ID (implicit in VS), layer and viewport.

However, the HW requires per-primitive FS inputs to be ordered last.
This causes bugs when the same unlinked FS is used together
with VS/TES/GS and MS (with unlinked ESO or fast-linked GPL).

To solve this problem, we reorder the FS inputs so that these
potentially per-primitive inputs go after per-vertex inputs but
before per-primitive inputs.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32220>
This commit is contained in:
Timur Kristóf 2024-11-19 14:49:17 +01:00 committed by Marge Bot
parent f4a3ba5302
commit 930243bf36
4 changed files with 93 additions and 22 deletions

View file

@ -63,6 +63,8 @@ void radv_nir_lower_io_to_scalar_early(nir_shader *nir, nir_variable_mode mask);
unsigned radv_map_io_driver_location(unsigned semantic);
bool radv_recompute_fs_input_bases(nir_shader *nir);
void radv_nir_lower_io(struct radv_device *device, nir_shader *nir);
bool radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *stage);

View file

@ -53,6 +53,78 @@ radv_nir_lower_io_to_scalar_early(nir_shader *nir, nir_variable_mode mask)
}
}
typedef struct {
uint64_t always_per_vertex;
uint64_t potentially_per_primitive;
uint64_t always_per_primitive;
unsigned num_always_per_vertex;
unsigned num_potentially_per_primitive;
} radv_recompute_fs_input_bases_state;
static bool
radv_recompute_fs_input_bases_callback(UNUSED nir_builder *b, nir_intrinsic_instr *intrin, void *data)
{
const radv_recompute_fs_input_bases_state *s = (const radv_recompute_fs_input_bases_state *)data;
/* Filter possible FS input intrinsics */
switch (intrin->intrinsic) {
case nir_intrinsic_load_input:
case nir_intrinsic_load_per_primitive_input:
case nir_intrinsic_load_interpolated_input:
case nir_intrinsic_load_input_vertex:
break;
default:
return false;
}
const nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
const uint64_t location_bit = BITFIELD64_BIT(sem.location);
const uint64_t location_mask = BITFIELD64_MASK(sem.location);
const unsigned old_base = nir_intrinsic_base(intrin);
unsigned new_base = 0;
if (location_bit & s->always_per_vertex) {
new_base = util_bitcount64(s->always_per_vertex & location_mask);
} else if (location_bit & s->potentially_per_primitive) {
new_base = s->num_always_per_vertex + util_bitcount64(s->potentially_per_primitive & location_mask);
} else if (location_bit & s->always_per_primitive) {
new_base = s->num_always_per_vertex + s->num_potentially_per_primitive +
util_bitcount64(s->always_per_primitive & location_mask);
} else {
unreachable("invalid FS input");
}
if (new_base != old_base) {
nir_intrinsic_set_base(intrin, new_base);
return true;
}
return false;
}
bool
radv_recompute_fs_input_bases(nir_shader *nir)
{
const uint64_t always_per_vertex = nir->info.inputs_read & ~nir->info.per_primitive_inputs &
~(VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
const uint64_t potentially_per_primitive =
nir->info.inputs_read & (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
const uint64_t always_per_primitive = nir->info.inputs_read & nir->info.per_primitive_inputs &
~(VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
radv_recompute_fs_input_bases_state s = {
.always_per_vertex = always_per_vertex,
.potentially_per_primitive = potentially_per_primitive,
.always_per_primitive = always_per_primitive,
.num_always_per_vertex = util_bitcount64(always_per_vertex),
.num_potentially_per_primitive = util_bitcount64(potentially_per_primitive),
};
return nir_shader_intrinsics_pass(nir, radv_recompute_fs_input_bases_callback, nir_metadata_control_flow, &s);
}
void
radv_nir_lower_io(struct radv_device *device, nir_shader *nir)
{
@ -83,10 +155,10 @@ radv_nir_lower_io(struct radv_device *device, nir_shader *nir)
}
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
/* Recompute FS input intrinsic bases to make sure that there are no gaps
* between the FS input slots.
/* Recompute FS input intrinsic bases to assign a location to each FS input.
* The computed base will match the index of each input in SPI_PS_INPUT_CNTL_n.
*/
nir_recompute_io_bases(nir, nir_var_shader_in);
radv_recompute_fs_input_bases(nir);
}
NIR_PASS_V(nir, nir_opt_dce);

View file

@ -2573,17 +2573,6 @@ radv_emit_ps_inputs(struct radv_cmd_buffer *cmd_buffer)
uint32_t ps_input_cntl[32];
unsigned ps_offset = 0;
if (!mesh) {
if (ps->info.ps.prim_id_input)
slot_to_ps_input(outinfo, VARYING_SLOT_PRIMITIVE_ID, ps_input_cntl, &ps_offset, false, radv_ps_in_flat);
if (ps->info.ps.layer_input)
slot_to_ps_input(outinfo, VARYING_SLOT_LAYER, ps_input_cntl, &ps_offset, true, radv_ps_in_flat);
if (ps->info.ps.viewport_index_input)
slot_to_ps_input(outinfo, VARYING_SLOT_VIEWPORT, ps_input_cntl, &ps_offset, true, radv_ps_in_flat);
}
if (ps->info.ps.has_pcoord)
ps_input_cntl[ps_offset++] = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20);
@ -2595,8 +2584,17 @@ radv_emit_ps_inputs(struct radv_cmd_buffer *cmd_buffer)
input_mask_to_ps_inputs(outinfo, ps, ps->info.ps.input_mask, ps_input_cntl, &ps_offset, radv_ps_in_flat);
/* Per-primitive PS inputs: the HW needs these to be last. */
if (mesh) {
/* Potentially per-primitive PS inputs */
if (!mesh) {
if (ps->info.ps.prim_id_input)
slot_to_ps_input(outinfo, VARYING_SLOT_PRIMITIVE_ID, ps_input_cntl, &ps_offset, false, radv_ps_in_flat);
if (ps->info.ps.layer_input)
slot_to_ps_input(outinfo, VARYING_SLOT_LAYER, ps_input_cntl, &ps_offset, true, radv_ps_in_flat);
if (ps->info.ps.viewport_index_input)
slot_to_ps_input(outinfo, VARYING_SLOT_VIEWPORT, ps_input_cntl, &ps_offset, true, radv_ps_in_flat);
} else {
if (ps->info.ps.prim_id_input)
slot_to_ps_input(outinfo, VARYING_SLOT_PRIMITIVE_ID, ps_input_cntl, &ps_offset, false, per_prim);
@ -2607,6 +2605,7 @@ radv_emit_ps_inputs(struct radv_cmd_buffer *cmd_buffer)
slot_to_ps_input(outinfo, VARYING_SLOT_VIEWPORT, ps_input_cntl, &ps_offset, true, per_prim);
}
/* Per-primitive PS inputs: the HW needs these to be last. */
input_mask_to_ps_inputs(outinfo, ps, ps->info.ps.input_per_primitive_mask, ps_input_cntl, &ps_offset, per_prim);
if (pdev->info.gfx_level >= GFX12) {

View file

@ -1535,18 +1535,16 @@ radv_graphics_shaders_link_varyings_second(struct radv_shader_stage *producer_st
NIR_PASS(_, consumer, nir_opt_vectorize_io, nir_var_shader_in);
}
/* Recompute driver locations of PS inputs
* because the backend compiler relies on their driver locations.
*/
if (consumer->info.stage == MESA_SHADER_FRAGMENT)
nir_recompute_io_bases(consumer, nir_var_shader_in);
/* Gather shader info; at least the I/O info likely changed
* and changes to only the I/O info are not reflected in nir_opt_varyings_progress.
*/
nir_shader_gather_info(producer, nir_shader_get_entrypoint(producer));
nir_shader_gather_info(consumer, nir_shader_get_entrypoint(consumer));
/* Recompute intrinsic bases of PS inputs in order to remove gaps. */
if (consumer->info.stage == MESA_SHADER_FRAGMENT)
radv_recompute_fs_input_bases(consumer);
/* Recreate XFB info from intrinsics (nir_opt_varyings may have changed it). */
if (producer->xfb_info) {
nir_gather_xfb_info_from_intrinsics(producer);