From d496f0b97d70e6c2862a7c5396c0d5178a9ca223 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Wed, 21 Dec 2022 17:32:57 +0100 Subject: [PATCH] radv: Decouple radv_before_taskmesh_draw from radv_before_draw. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit radv_before_taskmesh_draw will no longer call radv_before_draw and instead implement the necessary functionality on its own. radv_before_draw will no longer have to emit mesh shader descriptors. As a result, both functions should have a lower CPU overhead now. Signed-off-by: Timur Kristóf Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_cmd_buffer.c | 91 +++++++++++++++++--------------- 1 file changed, 48 insertions(+), 43 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 4c6d1c46b07..b59fb0b8e6c 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -4890,7 +4890,7 @@ radv_upload_graphics_shader_descriptors(struct radv_cmd_buffer *cmd_buffer) radv_flush_streamout_descriptors(cmd_buffer); - VkShaderStageFlags stages = VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_MESH_BIT_EXT; + VkShaderStageFlags stages = VK_SHADER_STAGE_ALL_GRAPHICS; radv_flush_descriptors(cmd_buffer, stages, &pipeline->base, VK_PIPELINE_BIND_POINT_GRAPHICS); radv_flush_constants(cmd_buffer, stages, &pipeline->base, VK_PIPELINE_BIND_POINT_GRAPHICS); radv_flush_ngg_query_state(cmd_buffer); @@ -8626,61 +8626,66 @@ ALWAYS_INLINE static bool radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info, uint32_t drawCount) { - struct radv_descriptor_state *descriptors_state = - radv_get_descriptors_state(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS); + /* For direct draws, this makes sure we don't draw anything. + * For indirect draws, this is necessary to prevent a GPU hang (on MEC version < 100). + */ + if (unlikely(!info->count)) + return false; + + struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; + struct radv_physical_device *pdevice = cmd_buffer->device->physical_device; + struct radeon_cmdbuf *ace_cs = cmd_buffer->ace_internal.cs; + struct radv_shader *task_shader = radv_get_shader(&pipeline->base, MESA_SHADER_TASK); + + assert(!task_shader || ace_cs); + + const VkShaderStageFlags stages = VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_FRAGMENT_BIT | (task_shader ? VK_SHADER_STAGE_TASK_BIT_EXT : 0); const bool pipeline_is_dirty = cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE && cmd_buffer->state.graphics_pipeline != cmd_buffer->state.emitted_graphics_pipeline; - const bool push_dirty = descriptors_state->push_dirty; - const uint32_t desc_dirty = descriptors_state->dirty; + const bool need_task_semaphore = task_shader && radv_flush_gfx2ace_semaphore(cmd_buffer); - const bool gfx_result = radv_before_draw(cmd_buffer, info, drawCount); - struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; - struct radv_shader *task_shader = radv_get_shader(&pipeline->base, MESA_SHADER_TASK); + ASSERTED const unsigned cdw_max = + radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4096 + 128 * (drawCount - 1)); + ASSERTED const unsigned ace_cdw_max = !ace_cs ? 0 : + radeon_check_space(cmd_buffer->device->ws, ace_cs, 4096 + 128 * (drawCount - 1)); - /* If there is no task shader, no need to do anything special. */ - if (!task_shader) - return gfx_result; + if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) + radv_emit_fb_mip_change_flush(cmd_buffer); - /* Need to check the count even for indirect draws to work around - * an issue with DISPATCH_TASKMESH_INDIRECT_MULTI_ACE. - */ - if (!info->count || !gfx_result) - return false; - - const bool need_task_semaphore = radv_flush_gfx2ace_semaphore(cmd_buffer); - struct radv_physical_device *pdevice = cmd_buffer->device->physical_device; - struct radeon_cmdbuf *ace_cs = cmd_buffer->ace_internal.cs; - struct radeon_winsys *ws = cmd_buffer->device->ws; - - assert(ace_cs); - ASSERTED const unsigned ace_cdw_max = - radeon_check_space(ws, ace_cs, 4096 + 128 * (drawCount - 1)); - - if (need_task_semaphore) - radv_wait_gfx2ace_semaphore(cmd_buffer); - - if (pipeline_is_dirty) { + radv_emit_all_graphics_states(cmd_buffer, info, pipeline_is_dirty); + if (task_shader && pipeline_is_dirty) { radv_pipeline_emit_hw_cs(pdevice, ace_cs, task_shader); radv_pipeline_emit_compute_state(pdevice, ace_cs, task_shader); } - radv_ace_internal_cache_flush(cmd_buffer); + si_emit_cache_flush(cmd_buffer); - /* Restore dirty state of descriptors - * They were marked non-dirty in radv_before_draw, - * but they need to be re-emitted now to the ACE cmdbuf. - */ - descriptors_state->push_dirty = push_dirty; - descriptors_state->dirty = desc_dirty; + if (task_shader) { + radv_ace_internal_cache_flush(cmd_buffer); - /* Flush descriptors and push constants for task shaders. */ - radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_TASK_BIT_EXT, &pipeline->base, - VK_PIPELINE_BIND_POINT_GRAPHICS); - radv_flush_constants(cmd_buffer, VK_SHADER_STAGE_TASK_BIT_EXT, &pipeline->base, - VK_PIPELINE_BIND_POINT_GRAPHICS); + if (need_task_semaphore) { + radv_wait_gfx2ace_semaphore(cmd_buffer); + } + } + + radv_flush_descriptors(cmd_buffer, stages, &pipeline->base, VK_PIPELINE_BIND_POINT_GRAPHICS); + radv_flush_constants(cmd_buffer, stages, &pipeline->base, VK_PIPELINE_BIND_POINT_GRAPHICS); + + radv_describe_draw(cmd_buffer); + if (likely(!info->indirect)) { + struct radv_cmd_state *state = &cmd_buffer->state; + if (unlikely(state->last_num_instances != 1)) { + struct radeon_cmdbuf *cs = cmd_buffer->cs; + radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, false)); + radeon_emit(cs, 1); + state->last_num_instances = 1; + } + } + + assert(cmd_buffer->cs->cdw <= cdw_max); + assert(!ace_cs || ace_cs->cdw <= ace_cdw_max); - assert(ace_cs->cdw <= ace_cdw_max); return true; }