ac: move has_cs_regalloc_hang_bug to ac_compiler_info

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41022>
This commit is contained in:
Rhys Perry 2026-04-24 13:31:20 +01:00 committed by Marge Bot
parent e40457b136
commit 5ee0935861
7 changed files with 17 additions and 17 deletions

View file

@ -401,6 +401,16 @@ ac_fill_compiler_info(struct radeon_info *info, const struct drm_amdgpu_info_dev
out->has_attr_ring_wait_bug = info->gfx_level >= GFX11 && info->gfx_level < GFX12;
out->has_primid_instancing_bug = info->gfx_level == GFX6 && info->max_se == 1;
/* HW bug workaround when CS threadgroups > 256 threads and async compute
* isn't used, i.e. only one compute job can run at a time. If async
* compute is possible, the threadgroup size must be limited to 256 threads
* on all queues to avoid the bug.
* Only GFX6 and certain GFX7 chips are affected.
*/
out->has_cs_regalloc_hang_bug = info->gfx_level == GFX6 ||
info->family == CHIP_BONAIRE ||
info->family == CHIP_KABINI;
}
void
@ -948,16 +958,6 @@ void ac_fill_bug_info(struct radeon_info *info)
*/
info->has_vrs_export_bug = info->gfx_level == GFX12;
/* HW bug workaround when CS threadgroups > 256 threads and async compute
* isn't used, i.e. only one compute job can run at a time. If async
* compute is possible, the threadgroup size must be limited to 256 threads
* on all queues to avoid the bug.
* Only GFX6 and certain GFX7 chips are affected.
*/
info->has_cs_regalloc_hang_bug = info->gfx_level == GFX6 ||
info->family == CHIP_BONAIRE ||
info->family == CHIP_KABINI;
/* HW bug workaround with async compute dispatches when threadgroup > 4096.
* The workaround is to change the "threadgroup" dimension mode to "thread"
* dimension mode.
@ -2078,6 +2078,7 @@ void ac_print_gpu_info(FILE *f, const struct radeon_info *info, int fd)
fprintf(f, " has_ngg_fully_culled_bug = %i\n", info->compiler_info.has_ngg_fully_culled_bug);
fprintf(f, " has_attr_ring_wait_bug = %i\n", info->compiler_info.has_attr_ring_wait_bug);
fprintf(f, " has_primid_instancing_bug = %i\n", info->compiler_info.has_primid_instancing_bug);
fprintf(f, " has_cs_regalloc_hang_bug = %i\n", info->compiler_info.has_cs_regalloc_hang_bug);
fprintf(f, "Ring info:\n");
if (info->gfx_level >= GFX11) {

View file

@ -198,8 +198,10 @@ struct ac_compiler_info {
uint32_t has_attr_ring_wait_bug : 1;
/* GFX6: limit TCS workgroup to one patch if primitive ID is used. */
uint32_t has_primid_instancing_bug : 1;
/* GFX6 and certain GFX7 chips: bug with compute workgroups larger 256 invocations. */
uint32_t has_cs_regalloc_hang_bug : 1;
uint32_t reserved : 5;
uint32_t reserved : 4;
};
struct radeon_info {
@ -269,7 +271,6 @@ struct radeon_info {
bool has_two_planes_iterate256_bug;
bool has_vgt_flush_ngg_legacy_bug;
bool has_prim_restart_sync_bug;
bool has_cs_regalloc_hang_bug;
bool has_async_compute_threadgroup_bug;
bool has_async_compute_align32_bug;
bool has_32bit_predication;

View file

@ -1146,7 +1146,6 @@ radv_device_init_compiler_info(struct radv_device *device)
.family = pdev->info.family,
.address32_hi = pdev->info.address32_hi,
.rbplus_allowed = pdev->info.rbplus_allowed,
.has_cs_regalloc_hang_bug = pdev->info.has_cs_regalloc_hang_bug,
},
/* Debug/tracing */
.debug =

View file

@ -860,7 +860,7 @@ radv_shader_spirv_to_nir(const struct radv_compiler_info *compiler_info, struct
if (progress)
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
if (compiler_info->hw.has_cs_regalloc_hang_bug && mesa_shader_stage_is_compute(nir->info.stage)) {
if (compiler_info->ac->has_cs_regalloc_hang_bug && mesa_shader_stage_is_compute(nir->info.stage)) {
const uint32_t wg_size = nir->info.workgroup_size[0] *
nir->info.workgroup_size[1] *
nir->info.workgroup_size[2];

View file

@ -518,7 +518,6 @@ struct radv_compiler_info {
uint32_t family;
uint32_t address32_hi;
bool rbplus_allowed;
bool has_cs_regalloc_hang_bug;
} hw;
/* Debug/tracing */

View file

@ -413,7 +413,7 @@ void si_init_compute_caps(struct si_screen *sscreen)
caps->subgroup_sizes = sscreen->info.gfx_level < GFX10 ? 64 : 64 | 32;
caps->max_variable_threads_per_block =
sscreen->info.has_cs_regalloc_hang_bug ? 256 : SI_MAX_VARIABLE_THREADS_PER_BLOCK;
sscreen->info.compiler_info.has_cs_regalloc_hang_bug ? 256 : SI_MAX_VARIABLE_THREADS_PER_BLOCK;
}
static void si_init_mesh_caps(struct si_screen *sscreen)

View file

@ -695,7 +695,7 @@ static void si_preprocess_nir(struct si_nir_shader_ctx *ctx)
}
if (mesa_shader_stage_is_compute(nir->info.stage)) {
if (sel->screen->info.has_cs_regalloc_hang_bug) {
if (sel->screen->info.compiler_info.has_cs_regalloc_hang_bug) {
const uint32_t wg_size = nir->info.workgroup_size[0] *
nir->info.workgroup_size[1] *
nir->info.workgroup_size[2];