diff --git a/.pick_status.json b/.pick_status.json index f5e58d057ef..848b82bd205 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -34,7 +34,7 @@ "description": "radv: emit BOP events after every draw to workaround a VRS bug on GFX12", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 45411369624..b5d748716b7 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -1096,6 +1096,13 @@ ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, info->family == CHIP_NAVI22 || info->family == CHIP_VANGOGH; + /* GFX12 is affected by random GPU hangs when VRS rates are exported by the + * last VGT stage under some conditions that are unclear. One possible + * workaround is to emit BOP events after every draw that exports VRS + * rates. + */ + info->has_vrs_export_bug = info->gfx_level == GFX12; + /* HW bug workaround when CS threadgroups > 256 threads and async compute * isn't used, i.e. only one compute job can run at a time. If async * compute is possible, the threadgroup size must be limited to 256 threads diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index ff8231facb5..2ed915bbdc4 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -229,6 +229,7 @@ struct radeon_info { bool has_attr_ring_wait_bug; bool cp_dma_supports_sparse; bool has_vrs_ds_export_bug; + bool has_vrs_export_bug; bool has_taskmesh_indirect0_bug; bool sdma_supports_sparse; /* Whether SDMA can safely access sparse resources. */ bool sdma_supports_compression; /* Whether SDMA supports DCC and HTILE. */ diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index f0f1f247a09..579397b3c45 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -10385,13 +10385,17 @@ radv_cs_emit_compute_predication(const struct radv_device *device, struct radv_c } ALWAYS_INLINE static void -radv_gfx12_emit_hiz_wa(const struct radv_device *device, const struct radv_cmd_state *cmd_state, - struct radv_cmd_stream *cs) +radv_gfx12_emit_wa(const struct radv_device *device, const struct radv_cmd_state *cmd_state, struct radv_cmd_stream *cs) { const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_rendering_state *render = &cmd_state->render; + const bool hiz_partial_wa_enabled = pdev->gfx12_hiz_wa == RADV_GFX12_HIZ_WA_PARTIAL && render->gfx12_has_hiz; + const bool vrs_export_wa_enabled = pdev->info.has_vrs_export_bug && cmd_state->last_vgt_shader && + cmd_state->last_vgt_shader->info.outinfo.writes_primitive_shading_rate; - if (pdev->gfx12_hiz_wa == RADV_GFX12_HIZ_WA_PARTIAL && render->gfx12_has_hiz) { + /* Emit BOP events to mitigate some hardware bugs on GFX12. */ + if (hiz_partial_wa_enabled || vrs_export_wa_enabled) { + assert(pdev->info.gfx_level == GFX12); radeon_begin(cs); radeon_emit(PKT3(PKT3_RELEASE_MEM, 6, 0)); radeon_emit(S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | S_490_EVENT_INDEX(5)); @@ -10417,7 +10421,7 @@ radv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t vertex_cou radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque); radeon_end(); - radv_gfx12_emit_hiz_wa(device, &cmd_buffer->state, cs); + radv_gfx12_emit_wa(device, &cmd_buffer->state, cs); } /** @@ -10447,7 +10451,7 @@ radv_cs_emit_draw_indexed_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t in radeon_emit(V_0287F0_DI_SRC_SEL_DMA | S_0287F0_NOT_EOP(not_eop)); radeon_end(); - radv_gfx12_emit_hiz_wa(device, &cmd_buffer->state, cs); + radv_gfx12_emit_wa(device, &cmd_buffer->state, cs); } /* MUST inline this function to avoid massive perf loss in drawoverhead */ @@ -10499,7 +10503,7 @@ radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, bool index radeon_end(); - radv_gfx12_emit_hiz_wa(device, &cmd_buffer->state, cs); + radv_gfx12_emit_wa(device, &cmd_buffer->state, cs); cmd_buffer->state.uses_draw_indirect = true; } @@ -10545,7 +10549,7 @@ radv_cs_emit_indirect_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint3 radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX); radeon_end(); - radv_gfx12_emit_hiz_wa(device, &cmd_buffer->state, cs); + radv_gfx12_emit_wa(device, &cmd_buffer->state, cs); } ALWAYS_INLINE static void @@ -10629,7 +10633,7 @@ radv_cs_emit_dispatch_taskmesh_gfx_packet(const struct radv_device *device, cons radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX); radeon_end(); - radv_gfx12_emit_hiz_wa(device, cmd_state, cs); + radv_gfx12_emit_wa(device, cmd_state, cs); } ALWAYS_INLINE static void @@ -10933,7 +10937,7 @@ radv_cs_emit_mesh_dispatch_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t x radeon_emit(S_0287F0_SOURCE_SELECT(V_0287F0_DI_SRC_SEL_AUTO_INDEX)); radeon_end(); - radv_gfx12_emit_hiz_wa(device, &cmd_buffer->state, cs); + radv_gfx12_emit_wa(device, &cmd_buffer->state, cs); } ALWAYS_INLINE static void