radv: emit BOP events after every draw to workaround a VRS bug on GFX12

Cc: mesa-stable
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/work_items/14812
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
(cherry picked from commit bf7e29617d)

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40752>
This commit is contained in:
Samuel Pitoiset 2026-03-30 18:21:38 +02:00 committed by Eric Engestrom
parent dfd0e55b5a
commit 0a9270779f
4 changed files with 22 additions and 10 deletions

View file

@ -34,7 +34,7 @@
"description": "radv: emit BOP events after every draw to workaround a VRS bug on GFX12",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": null,
"notes": null

View file

@ -1096,6 +1096,13 @@ ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
info->family == CHIP_NAVI22 ||
info->family == CHIP_VANGOGH;
/* GFX12 is affected by random GPU hangs when VRS rates are exported by the
* last VGT stage under some conditions that are unclear. One possible
* workaround is to emit BOP events after every draw that exports VRS
* rates.
*/
info->has_vrs_export_bug = info->gfx_level == GFX12;
/* HW bug workaround when CS threadgroups > 256 threads and async compute
* isn't used, i.e. only one compute job can run at a time. If async
* compute is possible, the threadgroup size must be limited to 256 threads

View file

@ -229,6 +229,7 @@ struct radeon_info {
bool has_attr_ring_wait_bug;
bool cp_dma_supports_sparse;
bool has_vrs_ds_export_bug;
bool has_vrs_export_bug;
bool has_taskmesh_indirect0_bug;
bool sdma_supports_sparse; /* Whether SDMA can safely access sparse resources. */
bool sdma_supports_compression; /* Whether SDMA supports DCC and HTILE. */

View file

@ -10385,13 +10385,17 @@ radv_cs_emit_compute_predication(const struct radv_device *device, struct radv_c
}
ALWAYS_INLINE static void
radv_gfx12_emit_hiz_wa(const struct radv_device *device, const struct radv_cmd_state *cmd_state,
struct radv_cmd_stream *cs)
radv_gfx12_emit_wa(const struct radv_device *device, const struct radv_cmd_state *cmd_state, struct radv_cmd_stream *cs)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_rendering_state *render = &cmd_state->render;
const bool hiz_partial_wa_enabled = pdev->gfx12_hiz_wa == RADV_GFX12_HIZ_WA_PARTIAL && render->gfx12_has_hiz;
const bool vrs_export_wa_enabled = pdev->info.has_vrs_export_bug && cmd_state->last_vgt_shader &&
cmd_state->last_vgt_shader->info.outinfo.writes_primitive_shading_rate;
if (pdev->gfx12_hiz_wa == RADV_GFX12_HIZ_WA_PARTIAL && render->gfx12_has_hiz) {
/* Emit BOP events to mitigate some hardware bugs on GFX12. */
if (hiz_partial_wa_enabled || vrs_export_wa_enabled) {
assert(pdev->info.gfx_level == GFX12);
radeon_begin(cs);
radeon_emit(PKT3(PKT3_RELEASE_MEM, 6, 0));
radeon_emit(S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | S_490_EVENT_INDEX(5));
@ -10417,7 +10421,7 @@ radv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t vertex_cou
radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque);
radeon_end();
radv_gfx12_emit_hiz_wa(device, &cmd_buffer->state, cs);
radv_gfx12_emit_wa(device, &cmd_buffer->state, cs);
}
/**
@ -10447,7 +10451,7 @@ radv_cs_emit_draw_indexed_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t in
radeon_emit(V_0287F0_DI_SRC_SEL_DMA | S_0287F0_NOT_EOP(not_eop));
radeon_end();
radv_gfx12_emit_hiz_wa(device, &cmd_buffer->state, cs);
radv_gfx12_emit_wa(device, &cmd_buffer->state, cs);
}
/* MUST inline this function to avoid massive perf loss in drawoverhead */
@ -10499,7 +10503,7 @@ radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, bool index
radeon_end();
radv_gfx12_emit_hiz_wa(device, &cmd_buffer->state, cs);
radv_gfx12_emit_wa(device, &cmd_buffer->state, cs);
cmd_buffer->state.uses_draw_indirect = true;
}
@ -10545,7 +10549,7 @@ radv_cs_emit_indirect_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint3
radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX);
radeon_end();
radv_gfx12_emit_hiz_wa(device, &cmd_buffer->state, cs);
radv_gfx12_emit_wa(device, &cmd_buffer->state, cs);
}
ALWAYS_INLINE static void
@ -10629,7 +10633,7 @@ radv_cs_emit_dispatch_taskmesh_gfx_packet(const struct radv_device *device, cons
radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX);
radeon_end();
radv_gfx12_emit_hiz_wa(device, cmd_state, cs);
radv_gfx12_emit_wa(device, cmd_state, cs);
}
ALWAYS_INLINE static void
@ -10933,7 +10937,7 @@ radv_cs_emit_mesh_dispatch_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t x
radeon_emit(S_0287F0_SOURCE_SELECT(V_0287F0_DI_SRC_SEL_AUTO_INDEX));
radeon_end();
radv_gfx12_emit_hiz_wa(device, &cmd_buffer->state, cs);
radv_gfx12_emit_wa(device, &cmd_buffer->state, cs);
}
ALWAYS_INLINE static void