mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 16:08:04 +02:00
radv: Add compute DGC preprocessing support.
This should reduce the overhead due to reduced syncs. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25835>
This commit is contained in:
parent
108227a84e
commit
c4fb827441
3 changed files with 56 additions and 17 deletions
|
|
@ -9519,18 +9519,6 @@ radv_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer _b
|
|||
static void radv_dgc_before_dispatch(struct radv_cmd_buffer *cmd_buffer);
|
||||
static void radv_dgc_after_dispatch(struct radv_cmd_buffer *cmd_buffer);
|
||||
|
||||
static bool
|
||||
radv_use_dgc_predication(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo)
|
||||
{
|
||||
VK_FROM_HANDLE(radv_buffer, seq_count_buffer, pGeneratedCommandsInfo->sequencesCountBuffer);
|
||||
|
||||
/* Enable conditional rendering (if not enabled by user) to skip prepare/execute DGC calls when
|
||||
* the indirect sequence count might be zero. This can only be enabled on GFX because on ACE it's
|
||||
* not possible to skip the execute DGC call (ie. no INDIRECT_PACKET)
|
||||
*/
|
||||
return cmd_buffer->qf == RADV_QUEUE_GENERAL && seq_count_buffer && !cmd_buffer->state.predicating;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
radv_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer, VkBool32 isPreprocessed,
|
||||
const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo)
|
||||
|
|
@ -9557,7 +9545,11 @@ radv_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer, VkBool32 isPre
|
|||
cmd_buffer->state.predicating = true;
|
||||
}
|
||||
|
||||
radv_prepare_dgc(cmd_buffer, pGeneratedCommandsInfo);
|
||||
if (!layout->use_preprocess) {
|
||||
radv_prepare_dgc(cmd_buffer, pGeneratedCommandsInfo);
|
||||
|
||||
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2;
|
||||
}
|
||||
|
||||
if (compute) {
|
||||
radv_dgc_before_dispatch(cmd_buffer);
|
||||
|
|
|
|||
|
|
@ -1337,6 +1337,18 @@ radv_CreateIndirectCommandsLayoutNV(VkDevice _device, const VkIndirectCommandsLa
|
|||
if (!layout->indexed)
|
||||
layout->binds_index_buffer = false;
|
||||
|
||||
layout->use_preprocess = pCreateInfo->flags & VK_INDIRECT_COMMANDS_LAYOUT_USAGE_EXPLICIT_PREPROCESS_BIT_NV;
|
||||
|
||||
/* From the Vulkan spec (1.3.269, chapter 32):
|
||||
* "The bound descriptor sets and push constants that will be used with indirect command generation for the compute
|
||||
* piplines must already be specified at the time of preprocessing commands with vkCmdPreprocessGeneratedCommandsNV.
|
||||
* They must not change until the execution of indirect commands is submitted with vkCmdExecuteGeneratedCommandsNV."
|
||||
*
|
||||
* So we can always preprocess compute layouts.
|
||||
*/
|
||||
if (layout->pipeline_bind_point != VK_PIPELINE_BIND_POINT_COMPUTE)
|
||||
layout->use_preprocess = false;
|
||||
|
||||
*pIndirectCommandsLayout = radv_indirect_command_layout_to_handle(layout);
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
|
@ -1379,12 +1391,45 @@ radv_GetGeneratedCommandsMemoryRequirementsNV(VkDevice _device,
|
|||
align(cmd_buf_size + upload_buf_size, pMemoryRequirements->memoryRequirements.alignment);
|
||||
}
|
||||
|
||||
bool
|
||||
radv_use_dgc_predication(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo)
|
||||
{
|
||||
VK_FROM_HANDLE(radv_buffer, seq_count_buffer, pGeneratedCommandsInfo->sequencesCountBuffer);
|
||||
|
||||
/* Enable conditional rendering (if not enabled by user) to skip prepare/execute DGC calls when
|
||||
* the indirect sequence count might be zero. This can only be enabled on GFX because on ACE it's
|
||||
* not possible to skip the execute DGC call (ie. no INDIRECT_PACKET)
|
||||
*/
|
||||
return cmd_buffer->qf == RADV_QUEUE_GENERAL && seq_count_buffer && !cmd_buffer->state.predicating;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
radv_CmdPreprocessGeneratedCommandsNV(VkCommandBuffer commandBuffer,
|
||||
const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo)
|
||||
{
|
||||
/* Can't do anything here as we depend on some dynamic state in some cases that we only know
|
||||
* at draw time. */
|
||||
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
VK_FROM_HANDLE(radv_indirect_command_layout, layout, pGeneratedCommandsInfo->indirectCommandsLayout);
|
||||
|
||||
if (!layout->use_preprocess)
|
||||
return;
|
||||
|
||||
const bool use_predication = radv_use_dgc_predication(cmd_buffer, pGeneratedCommandsInfo);
|
||||
|
||||
if (use_predication) {
|
||||
VK_FROM_HANDLE(radv_buffer, seq_count_buffer, pGeneratedCommandsInfo->sequencesCountBuffer);
|
||||
const uint64_t va = radv_buffer_get_va(seq_count_buffer->bo) + seq_count_buffer->offset +
|
||||
pGeneratedCommandsInfo->sequencesCountOffset;
|
||||
|
||||
radv_begin_conditional_rendering(cmd_buffer, va, true);
|
||||
cmd_buffer->state.predicating = true;
|
||||
}
|
||||
|
||||
radv_prepare_dgc(cmd_buffer, pGeneratedCommandsInfo);
|
||||
|
||||
if (use_predication) {
|
||||
cmd_buffer->state.predicating = false;
|
||||
radv_end_conditional_rendering(cmd_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
/* Always need to call this directly before draw due to dependence on bound state. */
|
||||
|
|
@ -1658,8 +1703,6 @@ radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsIn
|
|||
|
||||
radv_buffer_finish(&token_buffer);
|
||||
radv_meta_restore(&saved_state, cmd_buffer);
|
||||
|
||||
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2;
|
||||
}
|
||||
|
||||
/* VK_NV_device_generated_commands_compute */
|
||||
|
|
|
|||
|
|
@ -3257,11 +3257,15 @@ struct radv_indirect_command_layout {
|
|||
uint32_t ibo_type_32;
|
||||
uint32_t ibo_type_8;
|
||||
|
||||
bool use_preprocess;
|
||||
|
||||
VkIndirectCommandsLayoutTokenNV tokens[0];
|
||||
};
|
||||
|
||||
uint32_t radv_get_indirect_cmdbuf_size(const VkGeneratedCommandsInfoNV *cmd_info);
|
||||
|
||||
bool radv_use_dgc_predication(struct radv_cmd_buffer *cmd_buffer,
|
||||
const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo);
|
||||
void radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo);
|
||||
|
||||
static inline uint32_t
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue