diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index aabb9de440e..b9bd4f5cd42 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -9913,7 +9913,21 @@ radv_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer, VkBool32 isPre } if (!radv_dgc_can_preprocess(layout, pipeline)) { - radv_prepare_dgc(cmd_buffer, pGeneratedCommandsInfo); + const bool old_predicating = cmd_buffer->state.predicating; + + if (cmd_buffer->qf == RADV_QUEUE_COMPUTE && cmd_buffer->state.predicating) { + /* Suspend conditional rendering when the DGC execute is called on the compute queue to + * generate a cmdbuf which will skips dispatches when necessary. This is because the + * compute queue is missing IB2 which means it's not possible to skip the cmdbuf entirely. + */ + cmd_buffer->state.predicating = false; + } + + radv_prepare_dgc(cmd_buffer, pGeneratedCommandsInfo, old_predicating); + + if (cmd_buffer->qf == RADV_QUEUE_COMPUTE) { + cmd_buffer->state.predicating = old_predicating; + } cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2; } diff --git a/src/amd/vulkan/radv_device_generated_commands.c b/src/amd/vulkan/radv_device_generated_commands.c index ff46436b328..b33271f41fb 100644 --- a/src/amd/vulkan/radv_device_generated_commands.c +++ b/src/amd/vulkan/radv_device_generated_commands.c @@ -230,6 +230,11 @@ struct radv_dgc_params { uint8_t is_dispatch; uint8_t use_preamble; + + /* For conditional rendering on ACE. */ + uint8_t predicating; + uint8_t predication_type; + uint64_t predication_va; }; enum { @@ -1194,6 +1199,28 @@ dgc_emit_draw_mesh_tasks(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_ nir_pop_if(b, NULL); } +static nir_def * +dgc_is_cond_render_enabled(nir_builder *b) +{ + nir_def *res1, *res2; + + nir_push_if(b, nir_ieq_imm(b, load_param8(b, predicating), 1)); + { + nir_def *val = nir_load_global(b, load_param64(b, predication_va), 4, 1, 32); + /* By default, all rendering commands are discarded if the 32-bit value is zero. If the + * inverted flag is set, they are discarded if the value is non-zero. + */ + res1 = nir_ixor(b, nir_i2b(b, load_param8(b, predication_type)), nir_ine_imm(b, val, 0)); + } + nir_push_else(b, 0); + { + res2 = nir_imm_bool(b, false); + } + nir_pop_if(b, 0); + + return nir_if_phi(b, res1, res2); +} + static nir_shader * build_dgc_prepare_shader(struct radv_device *dev) { @@ -1231,6 +1258,15 @@ build_dgc_prepare_shader(struct radv_device *dev) } nir_pop_if(&b, NULL); + nir_push_if(&b, dgc_is_cond_render_enabled(&b)); + { + /* Reset the number of sequences when conditional rendering is enabled in order to skip the + * entire shader and pad the cmdbuf with NOPs. + */ + nir_store_var(&b, count_var, nir_imm_int(&b, 0), 0x1); + } + nir_pop_if(&b, NULL); + sequence_count = nir_load_var(&b, count_var); nir_push_if(&b, nir_ult(&b, sequence_id, sequence_count)); @@ -1634,7 +1670,7 @@ radv_CmdPreprocessGeneratedCommandsNV(VkCommandBuffer commandBuffer, const bool old_predicating = cmd_buffer->state.predicating; cmd_buffer->state.predicating = false; - radv_prepare_dgc(cmd_buffer, pGeneratedCommandsInfo); + radv_prepare_dgc(cmd_buffer, pGeneratedCommandsInfo, false); /* Restore conditional rendering. */ cmd_buffer->state.predicating = old_predicating; @@ -1716,7 +1752,7 @@ radv_prepare_dgc_graphics(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedC static void radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo, unsigned *upload_size, unsigned *upload_offset, void **upload_data, - struct radv_dgc_params *params) + struct radv_dgc_params *params, bool cond_render_enabled) { VK_FROM_HANDLE(radv_indirect_command_layout, layout, pGeneratedCommandsInfo->indirectCommandsLayout); VK_FROM_HANDLE(radv_pipeline, pipeline, pGeneratedCommandsInfo->pipeline); @@ -1741,6 +1777,12 @@ radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo params->dispatch_initiator = dispatch_initiator; params->is_dispatch = 1; + if (cond_render_enabled) { + params->predicating = true; + params->predication_va = cmd_buffer->state.predication_va; + params->predication_type = cmd_buffer->state.predication_type; + } + const struct radv_userdata_info *loc = radv_get_user_sgpr(cs, AC_UD_CS_GRID_SIZE); if (loc->sgpr_idx != -1) { params->grid_base_sgpr = (cs->info.user_data_0 + 4 * loc->sgpr_idx - SI_SH_REG_OFFSET) >> 2; @@ -1748,7 +1790,8 @@ radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo } void -radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo) +radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo, + bool cond_render_enabled) { VK_FROM_HANDLE(radv_indirect_command_layout, layout, pGeneratedCommandsInfo->indirectCommandsLayout); VK_FROM_HANDLE(radv_pipeline, pipeline, pGeneratedCommandsInfo->pipeline); @@ -1792,7 +1835,8 @@ radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsIn ¶ms); } else { assert(layout->pipeline_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE); - radv_prepare_dgc_compute(cmd_buffer, pGeneratedCommandsInfo, &upload_size, &upload_offset, &upload_data, ¶ms); + radv_prepare_dgc_compute(cmd_buffer, pGeneratedCommandsInfo, &upload_size, &upload_offset, &upload_data, ¶ms, + cond_render_enabled); } if (layout->push_constant_mask) { diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index f35e1a35e03..3a435e7b0ae 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -3282,7 +3282,8 @@ uint32_t radv_get_indirect_cmdbuf_size(const VkGeneratedCommandsInfoNV *cmd_info bool radv_use_dgc_predication(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo); -void radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo); +void radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo, + bool cond_render_enabled); bool radv_dgc_can_preprocess(const struct radv_indirect_command_layout *layout, struct radv_pipeline *pipeline); diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index 889284f932f..29f6a5ff75d 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -779,8 +779,6 @@ radv_amdgpu_cs_execute_ib(struct radeon_cmdbuf *_cs, struct radeon_winsys_bo *bo const uint32_t ib_size = radv_amdgpu_cs_get_initial_size(cs->ws, cs->hw_ip); VkResult result; - assert(!predicate); - /* Finalize the current CS without chaining to execute the external IB. */ radv_amdgpu_cs_finalize(_cs);