radv: add a GPU hang workaround for legacy tess+GS for GFX10.3

Ported from RadeonSI ea94cb95e4
("radeonsi/gfx10.3: add a GPU hang workaround for legacy tess+GS")

Fixes: a23802bcb9 ("ac,radeonsi: start adding support for gfx10.3")
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28165>
This commit is contained in:
Samuel Pitoiset 2024-03-14 08:43:29 +01:00 committed by Marge Bot
parent 96e9c3fe77
commit 67fd490fe5
3 changed files with 19 additions and 12 deletions

View file

@ -9204,7 +9204,8 @@ radv_emit_graphics_shaders(struct radv_cmd_buffer *cmd_buffer)
radv_get_vgt_shader_key(cmd_buffer->device, cmd_buffer->state.shaders, cmd_buffer->state.gs_copy_shader);
radv_emit_vgt_gs_mode(device, cs, last_vgt_shader);
radv_emit_vgt_vertex_reuse(device, cs, radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_TESS_EVAL));
radv_emit_vgt_reuse(device, cs, radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_TESS_EVAL),
&vgt_shader_cfg_key);
radv_emit_vgt_shader_config(device, cs, &vgt_shader_cfg_key);
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10_3) {

View file

@ -3555,19 +3555,25 @@ radv_emit_fragment_shader(const struct radv_device *device, struct radeon_cmdbuf
}
void
radv_emit_vgt_vertex_reuse(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
const struct radv_shader *tes)
radv_emit_vgt_reuse(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, const struct radv_shader *tes,
const struct radv_vgt_shader_key *key)
{
const struct radv_physical_device *pdevice = device->physical_device;
if (pdevice->rad_info.family < CHIP_POLARIS10 || pdevice->rad_info.gfx_level >= GFX10)
return;
if (pdevice->rad_info.gfx_level == GFX10_3) {
/* Legacy Tess+GS should disable reuse to prevent hangs on GFX10.3. */
const bool has_legacy_tess_gs = key->tess && key->gs && !key->ngg;
unsigned vtx_reuse_depth = 30;
if (tes && tes->info.tes.spacing == TESS_SPACING_FRACTIONAL_ODD) {
vtx_reuse_depth = 14;
radeon_set_context_reg(ctx_cs, R_028AB4_VGT_REUSE_OFF, S_028AB4_REUSE_OFF(has_legacy_tess_gs));
}
if (pdevice->rad_info.family >= CHIP_POLARIS10 && pdevice->rad_info.gfx_level < GFX10) {
unsigned vtx_reuse_depth = 30;
if (tes && tes->info.tes.spacing == TESS_SPACING_FRACTIONAL_ODD) {
vtx_reuse_depth = 14;
}
radeon_set_context_reg(ctx_cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, S_028C58_VTX_REUSE_DEPTH(vtx_reuse_depth));
}
radeon_set_context_reg(ctx_cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, S_028C58_VTX_REUSE_DEPTH(vtx_reuse_depth));
}
struct radv_vgt_shader_key
@ -3800,7 +3806,7 @@ radv_pipeline_emit_pm4(const struct radv_device *device, struct radv_graphics_pi
radv_emit_ps_inputs(device, ctx_cs, last_vgt_shader, ps);
}
radv_emit_vgt_vertex_reuse(device, ctx_cs, radv_get_shader(pipeline->base.shaders, MESA_SHADER_TESS_EVAL));
radv_emit_vgt_reuse(device, ctx_cs, radv_get_shader(pipeline->base.shaders, MESA_SHADER_TESS_EVAL), &vgt_shader_key);
radv_emit_vgt_shader_config(device, ctx_cs, &vgt_shader_key);
if (pdevice->rad_info.gfx_level >= GFX10_3) {

View file

@ -2546,8 +2546,8 @@ void radv_emit_ps_inputs(const struct radv_device *device, struct radeon_cmdbuf
struct radv_ia_multi_vgt_param_helpers radv_compute_ia_multi_vgt_param(const struct radv_device *device,
struct radv_shader *const *shaders);
void radv_emit_vgt_vertex_reuse(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
const struct radv_shader *tes);
void radv_emit_vgt_reuse(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, const struct radv_shader *tes,
const struct radv_vgt_shader_key *key);
void radv_emit_vgt_gs_out(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
uint32_t vgt_gs_out_prim_type);