diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 69e9b233c27..d0a1747edbf 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -1302,6 +1302,17 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, info->has_export_conflict_bug = info->gfx_level == GFX11; + /* The hw starts culling after all exports are finished, + * not when all waves in an NGG workgroup are finished, + * and if all primitives are culled, the hw deallocates the attribute ring + * for the NGG workgroup and reuses it for next one while the previous NGG + * workgroup might still be issuing attribute stores. + * When there are 2 NGG workgroups in the system with the same attribute ring address, + * attributes may be corrupted. + * The workaround is to issue and wait for attribute stores before the last export. + */ + info->has_attr_ring_wait_bug = info->gfx_level == GFX11 || info->gfx_level == GFX11_5; + /* When LLVM is fixed to handle multiparts shaders, this value will depend * on the known good versions of LLVM. Until then, enable the equivalent WA * in the nir -> llvm backend. diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 742f1ee00b7..603a4d83757 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -116,6 +116,7 @@ struct radeon_info { bool never_send_perfcounter_stop; bool discardable_allows_big_page; bool has_export_conflict_bug; + bool has_attr_ring_wait_bug; bool has_vrs_ds_export_bug; bool has_taskmesh_indirect0_bug; bool sdma_supports_sparse; /* Whether SDMA can safely access sparse resources. */ diff --git a/src/amd/common/nir/ac_nir_lower_ngg.c b/src/amd/common/nir/ac_nir_lower_ngg.c index a9b1b0e5d15..55c209a14aa 100644 --- a/src/amd/common/nir/ac_nir_lower_ngg.c +++ b/src/amd/common/nir/ac_nir_lower_ngg.c @@ -6,6 +6,7 @@ #include "ac_nir.h" #include "ac_nir_helpers.h" +#include "ac_gpu_info.h" #include "amdgfxregs.h" #include "nir_builder.h" #include "nir_xfb_info.h" @@ -1655,11 +1656,6 @@ ngg_nogs_gather_outputs(nir_builder *b, struct exec_list *cf_list, lower_ngg_nog } } -static bool must_wait_attr_ring(enum amd_gfx_level gfx_level, bool has_param_exports) -{ - return (gfx_level == GFX11 || gfx_level == GFX11_5) && has_param_exports; -} - static void export_pos0_wait_attr_ring(nir_builder *b, nir_if *if_es_thread, nir_def *outputs[VARYING_SLOT_MAX][4], const ac_nir_lower_ngg_options *options) { @@ -1707,7 +1703,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option options->can_cull ? nir_local_variable_create(impl, glsl_bool_type(), "gs_accepted") : NULL; nir_variable *gs_exported_var = nir_local_variable_create(impl, glsl_bool_type(), "gs_exported"); - const bool wait_attr_ring = must_wait_attr_ring(options->gfx_level, options->has_param_exports); + const bool wait_attr_ring = options->has_param_exports && options->hw_info->has_attr_ring_wait_bug; bool streamout_enabled = shader->xfb_info && !options->disable_streamout; bool has_user_edgeflags = options->use_edgeflags && (shader->info.outputs_written & VARYING_BIT_EDGE); @@ -2408,7 +2404,7 @@ ngg_gs_export_vertices(nir_builder *b, nir_def *max_num_out_vtx, nir_def *tid_in if (s->options->kill_layer) export_outputs &= ~VARYING_BIT_LAYER; - const bool wait_attr_ring = must_wait_attr_ring(s->options->gfx_level, s->options->has_param_exports); + const bool wait_attr_ring = s->options->has_param_exports && s->options->hw_info->has_attr_ring_wait_bug; if (wait_attr_ring) export_outputs &= ~VARYING_BIT_POS; diff --git a/src/amd/vulkan/winsys/null/radv_null_winsys.c b/src/amd/vulkan/winsys/null/radv_null_winsys.c index 4028d397c6e..378a3b5afe8 100644 --- a/src/amd/vulkan/winsys/null/radv_null_winsys.c +++ b/src/amd/vulkan/winsys/null/radv_null_winsys.c @@ -117,6 +117,7 @@ radv_null_winsys_query_info(struct radeon_winsys *rws, struct radeon_info *gpu_i gpu_info->has_3d_cube_border_color_mipmap = true; gpu_info->has_image_opcodes = true; + gpu_info->has_attr_ring_wait_bug = gpu_info->gfx_level == GFX11 || gpu_info->gfx_level == GFX11_5; if (gpu_info->family == CHIP_NAVI31 || gpu_info->family == CHIP_NAVI32) gpu_info->num_physical_wave64_vgprs_per_simd = 768;