mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-02 09:30:11 +01:00
radv: Reorder potentially per-primitive FS builtins.
There are some FS built-ins that can be per-vertex or per-primitive depending on whether a mesh shader is used: primitive ID (implicit in VS), layer and viewport. However, the HW requires per-primitive FS inputs to be ordered last. This causes bugs when the same unlinked FS is used together with VS/TES/GS and MS (with unlinked ESO or fast-linked GPL). To solve this problem, we reorder the FS inputs so that these potentially per-primitive inputs go after per-vertex inputs but before per-primitive inputs. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32220>
This commit is contained in:
parent
f4a3ba5302
commit
930243bf36
4 changed files with 93 additions and 22 deletions
|
|
@ -63,6 +63,8 @@ void radv_nir_lower_io_to_scalar_early(nir_shader *nir, nir_variable_mode mask);
|
|||
|
||||
unsigned radv_map_io_driver_location(unsigned semantic);
|
||||
|
||||
bool radv_recompute_fs_input_bases(nir_shader *nir);
|
||||
|
||||
void radv_nir_lower_io(struct radv_device *device, nir_shader *nir);
|
||||
|
||||
bool radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *stage);
|
||||
|
|
|
|||
|
|
@ -53,6 +53,78 @@ radv_nir_lower_io_to_scalar_early(nir_shader *nir, nir_variable_mode mask)
|
|||
}
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
uint64_t always_per_vertex;
|
||||
uint64_t potentially_per_primitive;
|
||||
uint64_t always_per_primitive;
|
||||
unsigned num_always_per_vertex;
|
||||
unsigned num_potentially_per_primitive;
|
||||
} radv_recompute_fs_input_bases_state;
|
||||
|
||||
static bool
|
||||
radv_recompute_fs_input_bases_callback(UNUSED nir_builder *b, nir_intrinsic_instr *intrin, void *data)
|
||||
{
|
||||
const radv_recompute_fs_input_bases_state *s = (const radv_recompute_fs_input_bases_state *)data;
|
||||
|
||||
/* Filter possible FS input intrinsics */
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_input:
|
||||
case nir_intrinsic_load_per_primitive_input:
|
||||
case nir_intrinsic_load_interpolated_input:
|
||||
case nir_intrinsic_load_input_vertex:
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
const nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
|
||||
const uint64_t location_bit = BITFIELD64_BIT(sem.location);
|
||||
const uint64_t location_mask = BITFIELD64_MASK(sem.location);
|
||||
const unsigned old_base = nir_intrinsic_base(intrin);
|
||||
unsigned new_base = 0;
|
||||
|
||||
if (location_bit & s->always_per_vertex) {
|
||||
new_base = util_bitcount64(s->always_per_vertex & location_mask);
|
||||
} else if (location_bit & s->potentially_per_primitive) {
|
||||
new_base = s->num_always_per_vertex + util_bitcount64(s->potentially_per_primitive & location_mask);
|
||||
} else if (location_bit & s->always_per_primitive) {
|
||||
new_base = s->num_always_per_vertex + s->num_potentially_per_primitive +
|
||||
util_bitcount64(s->always_per_primitive & location_mask);
|
||||
} else {
|
||||
unreachable("invalid FS input");
|
||||
}
|
||||
|
||||
if (new_base != old_base) {
|
||||
nir_intrinsic_set_base(intrin, new_base);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
radv_recompute_fs_input_bases(nir_shader *nir)
|
||||
{
|
||||
const uint64_t always_per_vertex = nir->info.inputs_read & ~nir->info.per_primitive_inputs &
|
||||
~(VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
|
||||
|
||||
const uint64_t potentially_per_primitive =
|
||||
nir->info.inputs_read & (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
|
||||
|
||||
const uint64_t always_per_primitive = nir->info.inputs_read & nir->info.per_primitive_inputs &
|
||||
~(VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
|
||||
|
||||
radv_recompute_fs_input_bases_state s = {
|
||||
.always_per_vertex = always_per_vertex,
|
||||
.potentially_per_primitive = potentially_per_primitive,
|
||||
.always_per_primitive = always_per_primitive,
|
||||
.num_always_per_vertex = util_bitcount64(always_per_vertex),
|
||||
.num_potentially_per_primitive = util_bitcount64(potentially_per_primitive),
|
||||
};
|
||||
|
||||
return nir_shader_intrinsics_pass(nir, radv_recompute_fs_input_bases_callback, nir_metadata_control_flow, &s);
|
||||
}
|
||||
|
||||
void
|
||||
radv_nir_lower_io(struct radv_device *device, nir_shader *nir)
|
||||
{
|
||||
|
|
@ -83,10 +155,10 @@ radv_nir_lower_io(struct radv_device *device, nir_shader *nir)
|
|||
}
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
/* Recompute FS input intrinsic bases to make sure that there are no gaps
|
||||
* between the FS input slots.
|
||||
/* Recompute FS input intrinsic bases to assign a location to each FS input.
|
||||
* The computed base will match the index of each input in SPI_PS_INPUT_CNTL_n.
|
||||
*/
|
||||
nir_recompute_io_bases(nir, nir_var_shader_in);
|
||||
radv_recompute_fs_input_bases(nir);
|
||||
}
|
||||
|
||||
NIR_PASS_V(nir, nir_opt_dce);
|
||||
|
|
|
|||
|
|
@ -2573,17 +2573,6 @@ radv_emit_ps_inputs(struct radv_cmd_buffer *cmd_buffer)
|
|||
uint32_t ps_input_cntl[32];
|
||||
unsigned ps_offset = 0;
|
||||
|
||||
if (!mesh) {
|
||||
if (ps->info.ps.prim_id_input)
|
||||
slot_to_ps_input(outinfo, VARYING_SLOT_PRIMITIVE_ID, ps_input_cntl, &ps_offset, false, radv_ps_in_flat);
|
||||
|
||||
if (ps->info.ps.layer_input)
|
||||
slot_to_ps_input(outinfo, VARYING_SLOT_LAYER, ps_input_cntl, &ps_offset, true, radv_ps_in_flat);
|
||||
|
||||
if (ps->info.ps.viewport_index_input)
|
||||
slot_to_ps_input(outinfo, VARYING_SLOT_VIEWPORT, ps_input_cntl, &ps_offset, true, radv_ps_in_flat);
|
||||
}
|
||||
|
||||
if (ps->info.ps.has_pcoord)
|
||||
ps_input_cntl[ps_offset++] = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20);
|
||||
|
||||
|
|
@ -2595,8 +2584,17 @@ radv_emit_ps_inputs(struct radv_cmd_buffer *cmd_buffer)
|
|||
|
||||
input_mask_to_ps_inputs(outinfo, ps, ps->info.ps.input_mask, ps_input_cntl, &ps_offset, radv_ps_in_flat);
|
||||
|
||||
/* Per-primitive PS inputs: the HW needs these to be last. */
|
||||
if (mesh) {
|
||||
/* Potentially per-primitive PS inputs */
|
||||
if (!mesh) {
|
||||
if (ps->info.ps.prim_id_input)
|
||||
slot_to_ps_input(outinfo, VARYING_SLOT_PRIMITIVE_ID, ps_input_cntl, &ps_offset, false, radv_ps_in_flat);
|
||||
|
||||
if (ps->info.ps.layer_input)
|
||||
slot_to_ps_input(outinfo, VARYING_SLOT_LAYER, ps_input_cntl, &ps_offset, true, radv_ps_in_flat);
|
||||
|
||||
if (ps->info.ps.viewport_index_input)
|
||||
slot_to_ps_input(outinfo, VARYING_SLOT_VIEWPORT, ps_input_cntl, &ps_offset, true, radv_ps_in_flat);
|
||||
} else {
|
||||
if (ps->info.ps.prim_id_input)
|
||||
slot_to_ps_input(outinfo, VARYING_SLOT_PRIMITIVE_ID, ps_input_cntl, &ps_offset, false, per_prim);
|
||||
|
||||
|
|
@ -2607,6 +2605,7 @@ radv_emit_ps_inputs(struct radv_cmd_buffer *cmd_buffer)
|
|||
slot_to_ps_input(outinfo, VARYING_SLOT_VIEWPORT, ps_input_cntl, &ps_offset, true, per_prim);
|
||||
}
|
||||
|
||||
/* Per-primitive PS inputs: the HW needs these to be last. */
|
||||
input_mask_to_ps_inputs(outinfo, ps, ps->info.ps.input_per_primitive_mask, ps_input_cntl, &ps_offset, per_prim);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
|
|
|
|||
|
|
@ -1535,18 +1535,16 @@ radv_graphics_shaders_link_varyings_second(struct radv_shader_stage *producer_st
|
|||
NIR_PASS(_, consumer, nir_opt_vectorize_io, nir_var_shader_in);
|
||||
}
|
||||
|
||||
/* Recompute driver locations of PS inputs
|
||||
* because the backend compiler relies on their driver locations.
|
||||
*/
|
||||
if (consumer->info.stage == MESA_SHADER_FRAGMENT)
|
||||
nir_recompute_io_bases(consumer, nir_var_shader_in);
|
||||
|
||||
/* Gather shader info; at least the I/O info likely changed
|
||||
* and changes to only the I/O info are not reflected in nir_opt_varyings_progress.
|
||||
*/
|
||||
nir_shader_gather_info(producer, nir_shader_get_entrypoint(producer));
|
||||
nir_shader_gather_info(consumer, nir_shader_get_entrypoint(consumer));
|
||||
|
||||
/* Recompute intrinsic bases of PS inputs in order to remove gaps. */
|
||||
if (consumer->info.stage == MESA_SHADER_FRAGMENT)
|
||||
radv_recompute_fs_input_bases(consumer);
|
||||
|
||||
/* Recreate XFB info from intrinsics (nir_opt_varyings may have changed it). */
|
||||
if (producer->xfb_info) {
|
||||
nir_gather_xfb_info_from_intrinsics(producer);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue