ac/nir/ngg: Add and use a has_attr_ring_wait_bug field to ac_gpu_info.

And apply the attribute ring wait workaround based on the new field.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33218>
This commit is contained in:
Timur Kristóf 2025-01-27 12:09:37 +01:00 committed by Marge Bot
parent e76361d626
commit b163ce51b1
4 changed files with 16 additions and 7 deletions

View file

@ -1302,6 +1302,17 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
info->has_export_conflict_bug = info->gfx_level == GFX11;
/* The hw starts culling after all exports are finished,
* not when all waves in an NGG workgroup are finished,
* and if all primitives are culled, the hw deallocates the attribute ring
* for the NGG workgroup and reuses it for next one while the previous NGG
* workgroup might still be issuing attribute stores.
* When there are 2 NGG workgroups in the system with the same attribute ring address,
* attributes may be corrupted.
* The workaround is to issue and wait for attribute stores before the last export.
*/
info->has_attr_ring_wait_bug = info->gfx_level == GFX11 || info->gfx_level == GFX11_5;
/* When LLVM is fixed to handle multiparts shaders, this value will depend
* on the known good versions of LLVM. Until then, enable the equivalent WA
* in the nir -> llvm backend.

View file

@ -116,6 +116,7 @@ struct radeon_info {
bool never_send_perfcounter_stop;
bool discardable_allows_big_page;
bool has_export_conflict_bug;
bool has_attr_ring_wait_bug;
bool has_vrs_ds_export_bug;
bool has_taskmesh_indirect0_bug;
bool sdma_supports_sparse; /* Whether SDMA can safely access sparse resources. */

View file

@ -6,6 +6,7 @@
#include "ac_nir.h"
#include "ac_nir_helpers.h"
#include "ac_gpu_info.h"
#include "amdgfxregs.h"
#include "nir_builder.h"
#include "nir_xfb_info.h"
@ -1655,11 +1656,6 @@ ngg_nogs_gather_outputs(nir_builder *b, struct exec_list *cf_list, lower_ngg_nog
}
}
static bool must_wait_attr_ring(enum amd_gfx_level gfx_level, bool has_param_exports)
{
return (gfx_level == GFX11 || gfx_level == GFX11_5) && has_param_exports;
}
static void
export_pos0_wait_attr_ring(nir_builder *b, nir_if *if_es_thread, nir_def *outputs[VARYING_SLOT_MAX][4], const ac_nir_lower_ngg_options *options)
{
@ -1707,7 +1703,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
options->can_cull ? nir_local_variable_create(impl, glsl_bool_type(), "gs_accepted") : NULL;
nir_variable *gs_exported_var = nir_local_variable_create(impl, glsl_bool_type(), "gs_exported");
const bool wait_attr_ring = must_wait_attr_ring(options->gfx_level, options->has_param_exports);
const bool wait_attr_ring = options->has_param_exports && options->hw_info->has_attr_ring_wait_bug;
bool streamout_enabled = shader->xfb_info && !options->disable_streamout;
bool has_user_edgeflags =
options->use_edgeflags && (shader->info.outputs_written & VARYING_BIT_EDGE);
@ -2408,7 +2404,7 @@ ngg_gs_export_vertices(nir_builder *b, nir_def *max_num_out_vtx, nir_def *tid_in
if (s->options->kill_layer)
export_outputs &= ~VARYING_BIT_LAYER;
const bool wait_attr_ring = must_wait_attr_ring(s->options->gfx_level, s->options->has_param_exports);
const bool wait_attr_ring = s->options->has_param_exports && s->options->hw_info->has_attr_ring_wait_bug;
if (wait_attr_ring)
export_outputs &= ~VARYING_BIT_POS;

View file

@ -117,6 +117,7 @@ radv_null_winsys_query_info(struct radeon_winsys *rws, struct radeon_info *gpu_i
gpu_info->has_3d_cube_border_color_mipmap = true;
gpu_info->has_image_opcodes = true;
gpu_info->has_attr_ring_wait_bug = gpu_info->gfx_level == GFX11 || gpu_info->gfx_level == GFX11_5;
if (gpu_info->family == CHIP_NAVI31 || gpu_info->family == CHIP_NAVI32)
gpu_info->num_physical_wave64_vgprs_per_simd = 768;