diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 9378c77f424..1ec34eb127d 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -401,6 +401,16 @@ ac_fill_compiler_info(struct radeon_info *info, const struct drm_amdgpu_info_dev out->has_attr_ring_wait_bug = info->gfx_level >= GFX11 && info->gfx_level < GFX12; out->has_primid_instancing_bug = info->gfx_level == GFX6 && info->max_se == 1; + + /* HW bug workaround when CS threadgroups > 256 threads and async compute + * isn't used, i.e. only one compute job can run at a time. If async + * compute is possible, the threadgroup size must be limited to 256 threads + * on all queues to avoid the bug. + * Only GFX6 and certain GFX7 chips are affected. + */ + out->has_cs_regalloc_hang_bug = info->gfx_level == GFX6 || + info->family == CHIP_BONAIRE || + info->family == CHIP_KABINI; } void @@ -948,16 +958,6 @@ void ac_fill_bug_info(struct radeon_info *info) */ info->has_vrs_export_bug = info->gfx_level == GFX12; - /* HW bug workaround when CS threadgroups > 256 threads and async compute - * isn't used, i.e. only one compute job can run at a time. If async - * compute is possible, the threadgroup size must be limited to 256 threads - * on all queues to avoid the bug. - * Only GFX6 and certain GFX7 chips are affected. - */ - info->has_cs_regalloc_hang_bug = info->gfx_level == GFX6 || - info->family == CHIP_BONAIRE || - info->family == CHIP_KABINI; - /* HW bug workaround with async compute dispatches when threadgroup > 4096. * The workaround is to change the "threadgroup" dimension mode to "thread" * dimension mode. @@ -2078,6 +2078,7 @@ void ac_print_gpu_info(FILE *f, const struct radeon_info *info, int fd) fprintf(f, " has_ngg_fully_culled_bug = %i\n", info->compiler_info.has_ngg_fully_culled_bug); fprintf(f, " has_attr_ring_wait_bug = %i\n", info->compiler_info.has_attr_ring_wait_bug); fprintf(f, " has_primid_instancing_bug = %i\n", info->compiler_info.has_primid_instancing_bug); + fprintf(f, " has_cs_regalloc_hang_bug = %i\n", info->compiler_info.has_cs_regalloc_hang_bug); fprintf(f, "Ring info:\n"); if (info->gfx_level >= GFX11) { diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index dd5769e2b37..90cab9908d3 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -198,8 +198,10 @@ struct ac_compiler_info { uint32_t has_attr_ring_wait_bug : 1; /* GFX6: limit TCS workgroup to one patch if primitive ID is used. */ uint32_t has_primid_instancing_bug : 1; + /* GFX6 and certain GFX7 chips: bug with compute workgroups larger 256 invocations. */ + uint32_t has_cs_regalloc_hang_bug : 1; - uint32_t reserved : 5; + uint32_t reserved : 4; }; struct radeon_info { @@ -269,7 +271,6 @@ struct radeon_info { bool has_two_planes_iterate256_bug; bool has_vgt_flush_ngg_legacy_bug; bool has_prim_restart_sync_bug; - bool has_cs_regalloc_hang_bug; bool has_async_compute_threadgroup_bug; bool has_async_compute_align32_bug; bool has_32bit_predication; diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 3bca428221c..826b505e949 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1146,7 +1146,6 @@ radv_device_init_compiler_info(struct radv_device *device) .family = pdev->info.family, .address32_hi = pdev->info.address32_hi, .rbplus_allowed = pdev->info.rbplus_allowed, - .has_cs_regalloc_hang_bug = pdev->info.has_cs_regalloc_hang_bug, }, /* Debug/tracing */ .debug = diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 87da690b16c..c0922b45c85 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -860,7 +860,7 @@ radv_shader_spirv_to_nir(const struct radv_compiler_info *compiler_info, struct if (progress) nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); - if (compiler_info->hw.has_cs_regalloc_hang_bug && mesa_shader_stage_is_compute(nir->info.stage)) { + if (compiler_info->ac->has_cs_regalloc_hang_bug && mesa_shader_stage_is_compute(nir->info.stage)) { const uint32_t wg_size = nir->info.workgroup_size[0] * nir->info.workgroup_size[1] * nir->info.workgroup_size[2]; diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 1ec01650d6a..a0dd8ffe20d 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -518,7 +518,6 @@ struct radv_compiler_info { uint32_t family; uint32_t address32_hi; bool rbplus_allowed; - bool has_cs_regalloc_hang_bug; } hw; /* Debug/tracing */ diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index 7e450f0f33b..1f875fa7d8c 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -413,7 +413,7 @@ void si_init_compute_caps(struct si_screen *sscreen) caps->subgroup_sizes = sscreen->info.gfx_level < GFX10 ? 64 : 64 | 32; caps->max_variable_threads_per_block = - sscreen->info.has_cs_regalloc_hang_bug ? 256 : SI_MAX_VARIABLE_THREADS_PER_BLOCK; + sscreen->info.compiler_info.has_cs_regalloc_hang_bug ? 256 : SI_MAX_VARIABLE_THREADS_PER_BLOCK; } static void si_init_mesh_caps(struct si_screen *sscreen) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index bc80c982cd8..1dececdf870 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -695,7 +695,7 @@ static void si_preprocess_nir(struct si_nir_shader_ctx *ctx) } if (mesa_shader_stage_is_compute(nir->info.stage)) { - if (sel->screen->info.has_cs_regalloc_hang_bug) { + if (sel->screen->info.compiler_info.has_cs_regalloc_hang_bug) { const uint32_t wg_size = nir->info.workgroup_size[0] * nir->info.workgroup_size[1] * nir->info.workgroup_size[2];