mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-01 22:40:09 +01:00
ac/nir/ngg: Wait for attribute ring stores in mesh shaders.
Make sure that both per-vertex and per-primitive attribute
ring stores are finished before position or primitive export
instructions are executed.
This is necessary because we need to ensure that mesh shader
waves work correctly when they have either vertex-only or
primitive-only waves.
Cc: mesa-stable
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
(cherry picked from commit 93b4f200de)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25157>
This commit is contained in:
parent
e78d9b00bf
commit
f066d9bcd7
2 changed files with 112 additions and 57 deletions
|
|
@ -374,7 +374,7 @@
|
|||
"description": "ac/nir/ngg: Wait for attribute ring stores in mesh shaders.",
|
||||
"nominated": true,
|
||||
"nomination_type": 0,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": null,
|
||||
"notes": "Applies cleanly, but doesn't build due to needing `nir_def`"
|
||||
|
|
|
|||
|
|
@ -21,6 +21,13 @@
|
|||
VARYING_BIT_VIEWPORT | \
|
||||
VARYING_BIT_PRIMITIVE_SHADING_RATE)
|
||||
|
||||
#define MS_VERT_ARG_EXP_MASK \
|
||||
(VARYING_BIT_CULL_DIST0 | \
|
||||
VARYING_BIT_CULL_DIST1 | \
|
||||
VARYING_BIT_CLIP_DIST0 | \
|
||||
VARYING_BIT_CLIP_DIST1 | \
|
||||
VARYING_BIT_PSIZ)
|
||||
|
||||
enum {
|
||||
nggc_passflag_used_by_pos = 1,
|
||||
nggc_passflag_used_by_other = 2,
|
||||
|
|
@ -4422,69 +4429,117 @@ emit_ms_finale(nir_builder *b, lower_ngg_ms_state *s)
|
|||
* current thread's vertex attributes in a way the HW can export.
|
||||
*/
|
||||
|
||||
/* Export vertices. */
|
||||
nir_ssa_def *has_output_vertex = nir_ilt(b, invocation_index, num_vtx);
|
||||
nir_if *if_has_output_vertex = nir_push_if(b, has_output_vertex);
|
||||
{
|
||||
const uint64_t per_vertex_outputs =
|
||||
s->per_vertex_outputs & ~s->layout.attr_ring.vtx_attr.mask;
|
||||
ms_emit_arrayed_outputs(b, invocation_index, per_vertex_outputs, s);
|
||||
uint64_t per_vertex_outputs =
|
||||
s->per_vertex_outputs & ~s->layout.attr_ring.vtx_attr.mask;
|
||||
uint64_t per_primitive_outputs =
|
||||
s->per_primitive_outputs & ~s->layout.attr_ring.prm_attr.mask & ~SPECIAL_MS_OUT_MASK;
|
||||
|
||||
ac_nir_export_position(b, s->gfx_level, s->clipdist_enable_mask,
|
||||
!s->has_param_exports, false, true,
|
||||
s->per_vertex_outputs | VARYING_BIT_POS, s->outputs);
|
||||
|
||||
/* Export generic attributes on GFX10.3
|
||||
* (On GFX11 they are already stored in the attribute ring.)
|
||||
*/
|
||||
if (s->has_param_exports && s->gfx_level == GFX10_3) {
|
||||
ac_nir_export_parameters(b, s->vs_output_param_offset, per_vertex_outputs, 0, s->outputs,
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
const uint64_t per_vertex_special = VARYING_BIT_CULL_DIST0 | VARYING_BIT_CULL_DIST1 |
|
||||
VARYING_BIT_CLIP_DIST0 | VARYING_BIT_CLIP_DIST1 |
|
||||
VARYING_BIT_PSIZ;
|
||||
|
||||
/* GFX11+: also store special outputs to the attribute ring so PS can load them. */
|
||||
if (s->gfx_level >= GFX11 && (per_vertex_outputs & per_vertex_special)) {
|
||||
ms_emit_attribute_ring_output_stores(b, per_vertex_outputs & per_vertex_special, s);
|
||||
}
|
||||
/* Insert layer output store if the pipeline uses multiview but the API shader doesn't write it. */
|
||||
if (s->insert_layer_output) {
|
||||
b->shader->info.outputs_written |= VARYING_BIT_LAYER;
|
||||
b->shader->info.per_primitive_outputs |= VARYING_BIT_LAYER;
|
||||
per_primitive_outputs |= VARYING_BIT_LAYER;
|
||||
}
|
||||
|
||||
const bool has_special_param_exports =
|
||||
(per_vertex_outputs & MS_VERT_ARG_EXP_MASK) ||
|
||||
(per_primitive_outputs & MS_PRIM_ARG_EXP_MASK);
|
||||
|
||||
const bool wait_attr_ring = s->gfx_level == GFX11 && has_special_param_exports;
|
||||
|
||||
/* Export vertices. */
|
||||
if ((per_vertex_outputs & ~VARYING_BIT_POS) || !wait_attr_ring) {
|
||||
nir_ssa_def *has_output_vertex = nir_ilt(b, invocation_index, num_vtx);
|
||||
nir_if *if_has_output_vertex = nir_push_if(b, has_output_vertex);
|
||||
{
|
||||
ms_emit_arrayed_outputs(b, invocation_index, per_vertex_outputs, s);
|
||||
|
||||
if (!wait_attr_ring)
|
||||
ac_nir_export_position(b, s->gfx_level, s->clipdist_enable_mask,
|
||||
!s->has_param_exports, false, true,
|
||||
s->per_vertex_outputs | VARYING_BIT_POS, s->outputs);
|
||||
|
||||
/* Export generic attributes on GFX10.3
|
||||
* (On GFX11 they are already stored in the attribute ring.)
|
||||
*/
|
||||
if (s->has_param_exports && s->gfx_level == GFX10_3) {
|
||||
ac_nir_export_parameters(b, s->vs_output_param_offset, per_vertex_outputs, 0, s->outputs,
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
/* GFX11+: also store special outputs to the attribute ring so PS can load them. */
|
||||
if (s->gfx_level >= GFX11 && (per_vertex_outputs & MS_VERT_ARG_EXP_MASK)) {
|
||||
ms_emit_attribute_ring_output_stores(b, per_vertex_outputs & MS_VERT_ARG_EXP_MASK, s);
|
||||
}
|
||||
}
|
||||
nir_pop_if(b, if_has_output_vertex);
|
||||
}
|
||||
nir_pop_if(b, if_has_output_vertex);
|
||||
|
||||
/* Export primitives. */
|
||||
nir_ssa_def *has_output_primitive = nir_ilt(b, invocation_index, num_prm);
|
||||
nir_if *if_has_output_primitive = nir_push_if(b, has_output_primitive);
|
||||
{
|
||||
uint64_t per_primitive_outputs =
|
||||
s->per_primitive_outputs & ~s->layout.attr_ring.prm_attr.mask & ~SPECIAL_MS_OUT_MASK;
|
||||
ms_emit_arrayed_outputs(b, invocation_index, per_primitive_outputs, s);
|
||||
if (per_primitive_outputs || !wait_attr_ring) {
|
||||
nir_ssa_def *has_output_primitive = nir_ilt(b, invocation_index, num_prm);
|
||||
nir_if *if_has_output_primitive = nir_push_if(b, has_output_primitive);
|
||||
{
|
||||
ms_emit_arrayed_outputs(b, invocation_index, per_primitive_outputs, s);
|
||||
|
||||
/* Insert layer output store if the pipeline uses multiview but the API shader doesn't write it. */
|
||||
if (s->insert_layer_output) {
|
||||
s->outputs[VARYING_SLOT_LAYER][0] = nir_load_view_index(b);
|
||||
b->shader->info.outputs_written |= VARYING_BIT_LAYER;
|
||||
b->shader->info.per_primitive_outputs |= VARYING_BIT_LAYER;
|
||||
per_primitive_outputs |= VARYING_BIT_LAYER;
|
||||
}
|
||||
|
||||
ms_emit_primitive_export(b, invocation_index, num_vtx, per_primitive_outputs, s);
|
||||
|
||||
/* Export generic attributes on GFX10.3
|
||||
* (On GFX11 they are already stored in the attribute ring.)
|
||||
*/
|
||||
if (s->has_param_exports && s->gfx_level == GFX10_3) {
|
||||
ac_nir_export_parameters(b, s->vs_output_param_offset, per_primitive_outputs, 0,
|
||||
s->outputs, NULL, NULL);
|
||||
}
|
||||
|
||||
/* GFX11+: also store special per-primitive outputs to the attribute ring so PS can load them. */
|
||||
if (s->gfx_level >= GFX11) {
|
||||
ms_emit_attribute_ring_output_stores(b, per_primitive_outputs & MS_PRIM_ARG_EXP_MASK, s);
|
||||
/* Insert layer output store if the pipeline uses multiview but the API shader doesn't write it. */
|
||||
if (s->insert_layer_output) {
|
||||
s->outputs[VARYING_SLOT_LAYER][0] = nir_load_view_index(b);
|
||||
}
|
||||
|
||||
if (!wait_attr_ring)
|
||||
ms_emit_primitive_export(b, invocation_index, num_vtx, per_primitive_outputs, s);
|
||||
|
||||
/* Export generic attributes on GFX10.3
|
||||
* (On GFX11 they are already stored in the attribute ring.)
|
||||
*/
|
||||
if (s->has_param_exports && s->gfx_level == GFX10_3) {
|
||||
ac_nir_export_parameters(b, s->vs_output_param_offset, per_primitive_outputs, 0,
|
||||
s->outputs, NULL, NULL);
|
||||
}
|
||||
|
||||
/* GFX11+: also store special per-primitive outputs to the attribute ring so PS can load them. */
|
||||
if (s->gfx_level >= GFX11) {
|
||||
ms_emit_attribute_ring_output_stores(b, per_primitive_outputs & MS_PRIM_ARG_EXP_MASK, s);
|
||||
}
|
||||
}
|
||||
nir_pop_if(b, if_has_output_primitive);
|
||||
}
|
||||
|
||||
/* When we need to wait for attribute ring stores, we emit both position and primitive
|
||||
* export instructions after a barrier to make sure both per-vertex and per-primitive
|
||||
* attribute ring stores are finished before the GPU starts rasterization.
|
||||
*/
|
||||
if (wait_attr_ring) {
|
||||
/* Wait for attribute stores to finish. */
|
||||
nir_scoped_barrier(b, .execution_scope = SCOPE_SUBGROUP,
|
||||
.memory_scope = SCOPE_DEVICE,
|
||||
.memory_semantics = NIR_MEMORY_RELEASE,
|
||||
.memory_modes = nir_var_shader_out);
|
||||
|
||||
/* Position export only */
|
||||
nir_ssa_def *has_output_vertex = nir_ilt(b, invocation_index, num_vtx);
|
||||
nir_if *if_has_output_vertex = nir_push_if(b, has_output_vertex);
|
||||
{
|
||||
ms_emit_arrayed_outputs(b, invocation_index, per_vertex_outputs, s);
|
||||
ac_nir_export_position(b, s->gfx_level, s->clipdist_enable_mask,
|
||||
!s->has_param_exports, false, true,
|
||||
s->per_vertex_outputs | VARYING_BIT_POS, s->outputs);
|
||||
}
|
||||
nir_pop_if(b, if_has_output_vertex);
|
||||
|
||||
nir_ssa_def *has_output_primitive = nir_ilt(b, invocation_index, num_prm);
|
||||
nir_if *if_has_output_primitive = nir_push_if(b, has_output_primitive);
|
||||
{
|
||||
ms_emit_arrayed_outputs(b, invocation_index, per_primitive_outputs, s);
|
||||
if (s->insert_layer_output) {
|
||||
s->outputs[VARYING_SLOT_LAYER][0] = nir_load_view_index(b);
|
||||
}
|
||||
|
||||
ms_emit_primitive_export(b, invocation_index, num_vtx, per_primitive_outputs, s);
|
||||
}
|
||||
nir_pop_if(b, if_has_output_primitive);
|
||||
}
|
||||
nir_pop_if(b, if_has_output_primitive);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue