From 3ca2f71f3d7eebfaca3d48cdc840c5127cf90d48 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 13 May 2025 20:20:05 +0200 Subject: [PATCH] radv: fix conditional rendering with DGC and non native 32-bit predicate When the hardware doesn't natively support 32-bit predication, the driver has a fallback which allocates a 64-bit predicate to the upload BO in order to copy the original value. But when conditional rendering is enabled in the stateCommandBuffer which is used by preprocess() and the execute() is recorded also in the stateCommandBuffer. If the preprocess() is recorded in a different cmdbuf which is submitted before the cmdbuf that contains execute(), the fallback (ie. alloc + COPY_DATA) will be performed after. This would cause the predicate value to be always 0. To fix that, keep track of the user predication VA which is the only VA that needs to be used by DGC because it reads 32-bit from the shader. This fixes a very weird corner case with vkd3d-proton. Cc: mesa-stable Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/13143 Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/meta/radv_meta_fast_clear.c | 6 ++++- src/amd/vulkan/radv_cmd_buffer.c | 26 +++++++++++++--------- src/amd/vulkan/radv_cmd_buffer.h | 3 ++- src/amd/vulkan/radv_dgc.c | 2 +- 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/src/amd/vulkan/meta/radv_meta_fast_clear.c b/src/amd/vulkan/meta/radv_meta_fast_clear.c index 7fb26cd3b9d..1214267ec6b 100644 --- a/src/amd/vulkan/meta/radv_meta_fast_clear.c +++ b/src/amd/vulkan/meta/radv_meta_fast_clear.c @@ -310,6 +310,7 @@ radv_process_color_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image * const VkImageSubresourceRange *subresourceRange, enum radv_color_op op) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_meta_saved_state saved_state; bool old_predicating = false; uint64_t pred_offset; @@ -396,8 +397,11 @@ radv_process_color_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image * if (cmd_buffer->state.predication_type != -1) { /* Restore previous conditional rendering user state. */ + const uint64_t pred_va = pdev->info.has_32bit_predication ? cmd_buffer->state.user_predication_va + : cmd_buffer->state.emulated_predication_va; + radv_emit_set_predication_state(cmd_buffer, cmd_buffer->state.predication_type, - cmd_buffer->state.predication_op, cmd_buffer->state.predication_va); + cmd_buffer->state.predication_op, pred_va); } } diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index c77aeaac4e2..a9b0bb85c7f 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -9492,7 +9492,7 @@ radv_cs_emit_compute_predication(const struct radv_device *device, struct radv_c if (!state->predicating) return; - uint64_t va = state->predication_va; + uint64_t va = state->user_predication_va; if (!state->predication_type) { /* Invert the condition the first time it is needed. */ @@ -13583,12 +13583,15 @@ radv_begin_conditional_rendering(struct radv_cmd_buffer *cmd_buffer, uint64_t va const struct radv_physical_device *pdev = radv_device_physical(device); struct radeon_cmdbuf *cs = cmd_buffer->cs; unsigned pred_op = PREDICATION_OP_BOOL32; + uint64_t emulated_va = 0; radv_emit_cache_flush(cmd_buffer); if (cmd_buffer->qf == RADV_QUEUE_GENERAL) { - if (!pdev->info.has_32bit_predication) { - uint64_t pred_value = 0, pred_va; + if (pdev->info.has_32bit_predication) { + radv_emit_set_predication_state(cmd_buffer, draw_visible, pred_op, va); + } else { + uint64_t pred_value = 0; unsigned pred_offset; /* From the Vulkan spec 1.1.107: @@ -13619,7 +13622,7 @@ radv_begin_conditional_rendering(struct radv_cmd_buffer *cmd_buffer, uint64_t va */ radv_cmd_buffer_upload_data(cmd_buffer, 8, &pred_value, &pred_offset); - pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset; + emulated_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset; radeon_check_space(device->ws, cmd_buffer->cs, 8); radeon_begin(cs); @@ -13629,18 +13632,17 @@ radv_begin_conditional_rendering(struct radv_cmd_buffer *cmd_buffer, uint64_t va COPY_DATA_WR_CONFIRM); radeon_emit(va); radeon_emit(va >> 32); - radeon_emit(pred_va); - radeon_emit(pred_va >> 32); + radeon_emit(emulated_va); + radeon_emit(emulated_va >> 32); radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0)); radeon_emit(0); radeon_end(); - va = pred_va; pred_op = PREDICATION_OP_BOOL64; - } - radv_emit_set_predication_state(cmd_buffer, draw_visible, pred_op, va); + radv_emit_set_predication_state(cmd_buffer, draw_visible, pred_op, emulated_va); + } } else { /* Compute queue doesn't support predication and it's emulated elsewhere. */ } @@ -13649,7 +13651,8 @@ radv_begin_conditional_rendering(struct radv_cmd_buffer *cmd_buffer, uint64_t va cmd_buffer->state.predicating = true; cmd_buffer->state.predication_type = draw_visible; cmd_buffer->state.predication_op = pred_op; - cmd_buffer->state.predication_va = va; + cmd_buffer->state.user_predication_va = va; + cmd_buffer->state.emulated_predication_va = emulated_va; cmd_buffer->state.mec_inv_pred_emitted = false; } @@ -13666,7 +13669,8 @@ radv_end_conditional_rendering(struct radv_cmd_buffer *cmd_buffer) cmd_buffer->state.predicating = false; cmd_buffer->state.predication_type = -1; cmd_buffer->state.predication_op = 0; - cmd_buffer->state.predication_va = 0; + cmd_buffer->state.user_predication_va = 0; + cmd_buffer->state.emulated_predication_va = 0; cmd_buffer->state.mec_inv_pred_emitted = false; } diff --git a/src/amd/vulkan/radv_cmd_buffer.h b/src/amd/vulkan/radv_cmd_buffer.h index e5afb9ebdc1..82eaf398aff 100644 --- a/src/amd/vulkan/radv_cmd_buffer.h +++ b/src/amd/vulkan/radv_cmd_buffer.h @@ -421,7 +421,8 @@ struct radv_cmd_state { /* Conditional rendering info. */ uint8_t predication_op; /* 32-bit or 64-bit predicate value */ int predication_type; /* -1: disabled, 0: normal, 1: inverted */ - uint64_t predication_va; + uint64_t user_predication_va; /* User predication VA. */ + uint64_t emulated_predication_va; /* Emulated VA if no 32-bit predication support. */ uint64_t mec_inv_pred_va; /* For inverted predication when using MEC. */ bool mec_inv_pred_emitted; /* To ensure we don't have to repeat inverting the VA. */ bool saved_user_cond_render; diff --git a/src/amd/vulkan/radv_dgc.c b/src/amd/vulkan/radv_dgc.c index fca505f11dd..28afafb5da1 100644 --- a/src/amd/vulkan/radv_dgc.c +++ b/src/amd/vulkan/radv_dgc.c @@ -2826,7 +2826,7 @@ radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo if (cond_render_enabled) { params->predicating = true; - params->predication_va = state_cmd_buffer->state.predication_va; + params->predication_va = state_cmd_buffer->state.user_predication_va; params->predication_type = state_cmd_buffer->state.predication_type; }