ac/nir/ngg: Wait for attribute stores before VS/TES/GS pos0 export.

This is a HW bug workaround for some (all?) GFX11 chips.

On these chips, rasterization can start before the attribute ring
stores are finished, which can cause issues.
As a workaround, wait for attribute ring stores to finish
before doing the position export.

Mesh shaders will be taken care of in another commit.

Cc: mesa-stable
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Qiang Yu <yuq825@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
(cherry picked from commit edd51655f0)

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25157>
This commit is contained in:
Timur Kristóf 2023-08-22 21:51:43 +02:00 committed by Dylan Baker
parent 6c14a7b646
commit a8bcf44618
2 changed files with 54 additions and 3 deletions

View file

@ -394,7 +394,7 @@
"description": "ac/nir/ngg: Wait for attribute stores before VS/TES/GS pos0 export.",
"nominated": true,
"nomination_type": 0,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": null,
"notes": null

View file

@ -2304,6 +2304,43 @@ export_vertex_params_gfx11(nir_builder *b, nir_ssa_def *export_tid, nir_ssa_def
nir_pop_if(b, NULL);
}
static void
export_pos0_wait_attr_ring(nir_builder *b, nir_if *if_es_thread, nir_def *outputs[VARYING_SLOT_MAX][4], const ac_nir_lower_ngg_options *options)
{
b->cursor = nir_after_cf_node(&if_es_thread->cf_node);
/* Create phi for the position output values. */
vs_output pos_output = {
.slot = VARYING_SLOT_POS,
.chan = {
outputs[VARYING_SLOT_POS][0],
outputs[VARYING_SLOT_POS][1],
outputs[VARYING_SLOT_POS][2],
outputs[VARYING_SLOT_POS][3],
},
};
create_vertex_param_phis(b, 1, &pos_output);
b->cursor = nir_after_cf_list(&b->impl->body);
/* Wait for attribute stores to finish. */
nir_scoped_barrier(b, .execution_scope = SCOPE_SUBGROUP,
.memory_scope = SCOPE_DEVICE,
.memory_semantics = NIR_MEMORY_RELEASE,
.memory_modes = nir_var_mem_ssbo | nir_var_shader_out | nir_var_mem_global | nir_var_image);
/* Export just the pos0 output. */
nir_if *if_export_empty_pos = nir_push_if(b, if_es_thread->condition.ssa);
{
ac_nir_export_position(b, options->gfx_level,
options->clipdist_enable_mask,
!options->has_param_exports,
options->force_vrs, true,
VARYING_BIT_POS, &pos_output.chan);
}
nir_pop_if(b, if_export_empty_pos);
}
void
ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *options)
{
@ -2501,11 +2538,15 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
if (options->kill_pointsize)
export_outputs &= ~VARYING_BIT_PSIZ;
const bool wait_attr_ring = options->gfx_level == GFX11 && options->has_param_exports;
if (wait_attr_ring)
export_outputs &= ~VARYING_BIT_POS;
b->cursor = nir_after_cf_list(&if_es_thread->then_list);
ac_nir_export_position(b, options->gfx_level,
options->clipdist_enable_mask,
!options->has_param_exports,
options->force_vrs, true,
options->force_vrs, !wait_attr_ring,
export_outputs, state.outputs);
if (options->has_param_exports) {
@ -2538,6 +2579,9 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
}
}
if (wait_attr_ring)
export_pos0_wait_attr_ring(b, if_es_thread, state.outputs, options);
nir_metadata_preserve(impl, nir_metadata_none);
nir_validate_shader(shader, "after emitting NGG VS/TES");
@ -3032,10 +3076,14 @@ ngg_gs_export_vertices(nir_builder *b, nir_ssa_def *max_num_out_vtx, nir_ssa_def
if (s->options->kill_pointsize)
export_outputs &= ~VARYING_BIT_PSIZ;
const bool wait_attr_ring = s->options->gfx_level == GFX11 && s->options->has_param_exports;
if (wait_attr_ring)
export_outputs &= ~VARYING_BIT_POS;
ac_nir_export_position(b, s->options->gfx_level,
s->options->clipdist_enable_mask,
!s->options->has_param_exports,
s->options->force_vrs, true,
s->options->force_vrs, !wait_attr_ring,
export_outputs, s->outputs);
nir_pop_if(b, if_vtx_export_thread);
@ -3065,6 +3113,9 @@ ngg_gs_export_vertices(nir_builder *b, nir_ssa_def *max_num_out_vtx, nir_ssa_def
s->outputs_16bit_hi);
}
}
if (wait_attr_ring)
export_pos0_wait_attr_ring(b, if_vtx_export_thread, s->outputs, s->options);
}
static void