diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index e17c7b230e1..be9a929e3d1 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -533,6 +533,16 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) cmd_buffer->descriptors[i].push_dirty = false; } + if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) { + uint32_t pred_value = 0; + uint32_t pred_offset; + if (!radv_cmd_buffer_upload_data(cmd_buffer, 4, &pred_value, &pred_offset)) + cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; + + cmd_buffer->mec_inv_pred_emitted = false; + cmd_buffer->mec_inv_pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset; + } + if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9 && cmd_buffer->qf == RADV_QUEUE_GENERAL) { unsigned num_db = cmd_buffer->device->physical_device->rad_info.max_render_backends; @@ -6301,18 +6311,56 @@ radv_emit_view_index(struct radv_cmd_buffer *cmd_buffer, unsigned index) * Emulates predication for MEC using COND_EXEC. * When the current command buffer is predicating, emit a COND_EXEC packet * so that the MEC skips the next few dwords worth of packets. + * + * To make it work with inverted conditional rendering, we allocate + * space in the upload BO and emit some packets to invert the condition. */ static void -radv_cs_emit_compute_predication(struct radv_cmd_buffer *cmd_buffer, unsigned dwords) +radv_cs_emit_compute_predication(struct radv_cmd_state *state, struct radeon_cmdbuf *cs, + uint64_t inv_va, bool *inv_emitted, unsigned dwords) { - if (!cmd_buffer->state.predicating) + if (!state->predicating) return; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + uint64_t va = state->predication_va; + + if (!state->predication_type) { + /* Invert the condition the first time it is needed. */ + if (!*inv_emitted) { + *inv_emitted = true; + + /* Write 1 to the inverted predication VA. */ + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | + COPY_DATA_WR_CONFIRM); + radeon_emit(cs, 1); + radeon_emit(cs, 0); + radeon_emit(cs, inv_va); + radeon_emit(cs, inv_va >> 32); + + /* If the API predication VA == 0, skip next command. */ + radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, 0); + radeon_emit(cs, 6); /* 1x COPY_DATA size */ + + /* Write 0 to the new predication VA (when the API condition != 0) */ + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | + COPY_DATA_WR_CONFIRM); + radeon_emit(cs, 0); + radeon_emit(cs, 0); + radeon_emit(cs, inv_va); + radeon_emit(cs, inv_va >> 32); + } + + va = inv_va; + } radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0)); - radeon_emit(cs, cmd_buffer->state.predication_va); - radeon_emit(cs, cmd_buffer->state.predication_va >> 32); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); radeon_emit(cs, 0); /* Cache policy */ radeon_emit(cs, dwords); /* Size of the predicated packet(s) in DWORDs. */ } @@ -7649,7 +7697,9 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, } if (radv_cmd_buffer_uses_mec(cmd_buffer)) { - radv_cs_emit_compute_predication(cmd_buffer, 4 /* DISPATCH_INDIRECT size */); + radv_cs_emit_compute_predication(&cmd_buffer->state, cs, cmd_buffer->mec_inv_pred_va, + &cmd_buffer->mec_inv_pred_emitted, + 4 /* DISPATCH_INDIRECT size */); radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, 0) | PKT3_SHADER_TYPE_S(1)); radeon_emit(cs, info->va); radeon_emit(cs, info->va >> 32); @@ -7732,7 +7782,9 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, } if (radv_cmd_buffer_uses_mec(cmd_buffer)) { - radv_cs_emit_compute_predication(cmd_buffer, 5 /* DISPATCH_DIRECT size */); + radv_cs_emit_compute_predication(&cmd_buffer->state, cs, cmd_buffer->mec_inv_pred_va, + &cmd_buffer->mec_inv_pred_emitted, + 5 /* DISPATCH_DIRECT size */); predicating = false; } @@ -9096,54 +9148,8 @@ radv_CmdBeginConditionalRenderingEXT( pred_op = PREDICATION_OP_BOOL64; } - - if (radv_cmd_buffer_uses_mec(cmd_buffer)) { - /* MEC does not support real predication, so we have to emulate that - * using COND_EXEC packets before each dispatch. - */ - - if (!draw_visible) { - /* COND_EXEC can only skip the next packet(s) when the condition is 0. - * When using inverted conditional rendering, we allocate some space in - * the upload BO and emit some packets to invert the condition. - */ - - uint64_t pred_value = 0, pred_va; - unsigned pred_offset; - - radv_cmd_buffer_upload_data(cmd_buffer, 4, &pred_value, &pred_offset); - pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset; - - /* Write 1 to the new predication VA. */ - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | - COPY_DATA_WR_CONFIRM); - radeon_emit(cs, 1); - radeon_emit(cs, 0); - radeon_emit(cs, pred_va); - radeon_emit(cs, pred_va >> 32); - - /* If the API predication VA == 0, skip next command. */ - radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit(cs, 0); - radeon_emit(cs, 6); /* 1x COPY_DATA size */ - - /* Write 0 to the new predication VA (when the API condition != 0) */ - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | - COPY_DATA_WR_CONFIRM); - radeon_emit(cs, 0); - radeon_emit(cs, 0); - radeon_emit(cs, pred_va); - radeon_emit(cs, pred_va >> 32); - - va = pred_va; - draw_visible = true; - } - } else { - /* Enable predication for this command buffer. */ + /* MEC doesn't support predication, we emulate it elsewhere. */ + if (!radv_cmd_buffer_uses_mec(cmd_buffer)) { si_emit_set_predication_state(cmd_buffer, draw_visible, pred_op, va); } @@ -9152,6 +9158,7 @@ radv_CmdBeginConditionalRenderingEXT( cmd_buffer->state.predication_type = draw_visible; cmd_buffer->state.predication_op = pred_op; cmd_buffer->state.predication_va = va; + cmd_buffer->mec_inv_pred_emitted = false; } VKAPI_ATTR void VKAPI_CALL @@ -9159,11 +9166,8 @@ radv_CmdEndConditionalRenderingEXT(VkCommandBuffer commandBuffer) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - /* Note, MEC doesn't support predication, so we - * don't have to emit anything for MEC here. - */ + /* MEC doesn't support predication, no need to emit anything here. */ if (!radv_cmd_buffer_uses_mec(cmd_buffer)) { - /* Disable predication for this command buffer. */ si_emit_set_predication_state(cmd_buffer, false, 0, 0); } @@ -9172,6 +9176,7 @@ radv_CmdEndConditionalRenderingEXT(VkCommandBuffer commandBuffer) cmd_buffer->state.predication_type = -1; cmd_buffer->state.predication_op = 0; cmd_buffer->state.predication_va = 0; + cmd_buffer->mec_inv_pred_emitted = false; } /* VK_EXT_transform_feedback */ diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index ca1df44922c..1f3676844e5 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1634,6 +1634,9 @@ struct radv_cmd_buffer { uint32_t gfx9_fence_idx; uint64_t gfx9_eop_bug_va; + uint64_t mec_inv_pred_va; /* For inverted predication when using MEC. */ + bool mec_inv_pred_emitted; /* To ensure we don't have to repeat inverting the VA. */ + /** * Whether a query pool has been resetted and we have to flush caches. */