diff --git a/src/panfrost/vulkan/csf/panvk_cmd_buffer.h b/src/panfrost/vulkan/csf/panvk_cmd_buffer.h index 08939467348..f1b75e29aae 100644 --- a/src/panfrost/vulkan/csf/panvk_cmd_buffer.h +++ b/src/panfrost/vulkan/csf/panvk_cmd_buffer.h @@ -462,4 +462,12 @@ void panvk_per_arch(get_cs_deps)(struct panvk_cmd_buffer *cmdbuf, const VkDependencyInfo *in, struct panvk_cs_deps *out); +void panvk_per_arch(cmd_prepare_exec_cmd_for_draws)( + struct panvk_cmd_buffer *primary, + struct panvk_cmd_buffer *secondary); + +void panvk_per_arch(cmd_inherit_render_state)( + struct panvk_cmd_buffer *cmdbuf, + const VkCommandBufferBeginInfo *pBeginInfo); + #endif /* PANVK_CMD_BUFFER_H */ diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c index 497b417cbe0..af58e136b51 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c @@ -123,9 +123,9 @@ finish_cs(struct panvk_cmd_buffer *cmdbuf, uint32_t subqueue) cs_load32_to(b, error, debug_sync_addr, offsetof(struct panvk_cs_sync32, error)); cs_wait_slots(b, SB_ALL_MASK, false); - cs_sync32_add(b, true, MALI_CS_SYNC_SCOPE_SYSTEM, one, debug_sync_addr, - cs_now()); - + if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) + cs_sync32_add(b, true, MALI_CS_SYNC_SCOPE_SYSTEM, one, + debug_sync_addr, cs_now()); cs_match(b, error, cmp_scratch) { cs_case(b, 0) { /* Do nothing. */ @@ -701,5 +701,90 @@ panvk_per_arch(BeginCommandBuffer)(VkCommandBuffer commandBuffer, if (instance->debug_flags & PANVK_DEBUG_TRACE) cmdbuf->flags &= ~VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT; + panvk_per_arch(cmd_inherit_render_state)(cmdbuf, pBeginInfo); + return VK_SUCCESS; } + +static void +panvk_cmd_invalidate_state(struct panvk_cmd_buffer *cmdbuf) +{ + /* From the Vulkan 1.3.275 spec: + * + * "...There is one exception to this rule - if the primary command + * buffer is inside a render pass instance, then the render pass and + * subpass state is not disturbed by executing secondary command + * buffers." + * + * We need to reset everything EXCEPT the render pass state. + */ + struct panvk_rendering_state render_save = cmdbuf->state.gfx.render; + memset(&cmdbuf->state.gfx, 0, sizeof(cmdbuf->state.gfx)); + cmdbuf->state.gfx.render = render_save; + + cmdbuf->state.gfx.fs.desc.res_table = 0; + cmdbuf->state.gfx.fs.spd = 0; + cmdbuf->state.gfx.vs.desc.res_table = 0; + cmdbuf->state.gfx.vs.spds.pos = 0; + cmdbuf->state.gfx.vs.spds.var = 0; + cmdbuf->state.gfx.vb.dirty = true; + cmdbuf->state.gfx.ib.dirty = true; + + vk_dynamic_graphics_state_dirty_all(&cmdbuf->vk.dynamic_graphics_state); +} + +VKAPI_ATTR void VKAPI_CALL +panvk_per_arch(CmdExecuteCommands)(VkCommandBuffer commandBuffer, + uint32_t commandBufferCount, + const VkCommandBuffer *pCommandBuffers) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, primary, commandBuffer); + + if (commandBufferCount == 0) + return; + + for (uint32_t i = 0; i < commandBufferCount; i++) { + VK_FROM_HANDLE(panvk_cmd_buffer, secondary, pCommandBuffers[i]); + + /* make sure the CS context is setup properly + * to inherit the primary command buffer state + */ + primary->state.tls.info.tls.size = + MAX2(primary->state.tls.info.tls.size, + secondary->state.tls.info.tls.size); + panvk_per_arch(cmd_prepare_exec_cmd_for_draws)(primary, secondary); + + for (uint32_t j = 0; j < ARRAY_SIZE(primary->state.cs); j++) { + struct cs_builder *sec_b = panvk_get_cs_builder(secondary, j); + assert(cs_is_valid(sec_b)); + if (!cs_is_empty(sec_b)) { + struct cs_builder *prim_b = panvk_get_cs_builder(primary, j); + struct cs_index addr = cs_scratch_reg64(prim_b, 0); + struct cs_index size = cs_scratch_reg32(prim_b, 2); + cs_move64_to(prim_b, addr, cs_root_chunk_gpu_addr(sec_b)); + cs_move32_to(prim_b, size, cs_root_chunk_size(sec_b)); + cs_call(prim_b, addr, size); + } + } + } + + /* From the Vulkan 1.3.275 spec: + * + * "When secondary command buffer(s) are recorded to execute on a + * primary command buffer, the secondary command buffer inherits no + * state from the primary command buffer, and all state of the primary + * command buffer is undefined after an execute secondary command buffer + * command is recorded. There is one exception to this rule - if the + * primary command buffer is inside a render pass instance, then the + * render pass and subpass state is not disturbed by executing secondary + * command buffers. For state dependent commands (such as draws and + * dispatches), any state consumed by those commands must not be + * undefined." + * + * Therefore, it's the client's job to reset all the state in the primary + * after the secondary executes. However, if we're doing any internal + * dirty tracking, we may miss the fact that a secondary has messed with + * GPU state if we don't invalidate all our internal tracking. + */ + panvk_cmd_invalidate_state(primary); +} diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index 96695c29088..85ce6c134a6 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -39,6 +39,7 @@ #include "vk_format.h" #include "vk_meta.h" #include "vk_pipeline_layout.h" +#include "vk_render_pass.h" struct panvk_draw_info { struct { @@ -1376,13 +1377,16 @@ prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) if (result != VK_SUCCESS) return result; - result = get_tiler_desc(cmdbuf); - if (result != VK_SUCCESS) - return result; + if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY || + !(cmdbuf->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)) { + result = get_tiler_desc(cmdbuf); + if (result != VK_SUCCESS) + return result; - result = get_fb_descs(cmdbuf); - if (result != VK_SUCCESS) - return result; + result = get_fb_descs(cmdbuf); + if (result != VK_SUCCESS) + return result; + } struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_VERTEX_TILER); @@ -1510,6 +1514,25 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) cs_req_res(b, 0); } +void +panvk_per_arch(cmd_prepare_exec_cmd_for_draws)( + struct panvk_cmd_buffer *primary, + struct panvk_cmd_buffer *secondary) +{ + VkResult result; + + if (secondary->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) { + assert(primary->vk.render_pass); + result = get_tiler_desc(primary); + if (result != VK_SUCCESS) + return; + + result = get_fb_descs(primary); + if (result != VK_SUCCESS) + return; + } +} + VKAPI_ATTR void VKAPI_CALL panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, @@ -1646,8 +1669,8 @@ panvk_per_arch(CmdDrawIndexedIndirect)(VkCommandBuffer commandBuffer, } static void -panvk_cmd_begin_rendering_init_state(struct panvk_cmd_buffer *cmdbuf, - const VkRenderingInfo *pRenderingInfo) +panvk_cmd_init_render_state(struct panvk_cmd_buffer *cmdbuf, + const VkRenderingInfo *pRenderingInfo) { struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); struct panvk_physical_device *phys_dev = @@ -1657,10 +1680,6 @@ panvk_cmd_begin_rendering_init_state(struct panvk_cmd_buffer *cmdbuf, cmdbuf->state.gfx.render.flags = pRenderingInfo->flags; - /* Resuming from a suspended pass, the state should be unchanged. */ - if (cmdbuf->state.gfx.render.flags & VK_RENDERING_RESUMING_BIT) - return; - cmdbuf->state.gfx.render.dirty = true; memset(cmdbuf->state.gfx.render.fb.crc_valid, 0, sizeof(cmdbuf->state.gfx.render.fb.crc_valid)); @@ -1951,6 +1970,93 @@ preload_render_area_border(struct panvk_cmd_buffer *cmdbuf, } } +void +panvk_per_arch(cmd_inherit_render_state)( + struct panvk_cmd_buffer *cmdbuf, + const VkCommandBufferBeginInfo *pBeginInfo) +{ + if (cmdbuf->vk.level != VK_COMMAND_BUFFER_LEVEL_SECONDARY || + !(pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)) + return; + + assert(pBeginInfo->pInheritanceInfo); + char gcbiar_data[VK_GCBIARR_DATA_SIZE(MAX_RTS)]; + const VkRenderingInfo *resume_info = + vk_get_command_buffer_inheritance_as_rendering_resume(cmdbuf->vk.level, + pBeginInfo, + gcbiar_data); + if (resume_info) { + panvk_cmd_init_render_state(cmdbuf, resume_info); + return; + } + + const VkCommandBufferInheritanceRenderingInfo *inheritance_info = + vk_get_command_buffer_inheritance_rendering_info(cmdbuf->vk.level, + pBeginInfo); + assert(inheritance_info); + struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); + struct panvk_physical_device *phys_dev = + to_panvk_physical_device(dev->vk.physical); + struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info; + + cmdbuf->state.gfx.render.flags = inheritance_info->flags; + + cmdbuf->state.gfx.render.dirty = true; + memset(cmdbuf->state.gfx.render.fb.crc_valid, 0, + sizeof(cmdbuf->state.gfx.render.fb.crc_valid)); + memset(&cmdbuf->state.gfx.render.color_attachments, 0, + sizeof(cmdbuf->state.gfx.render.color_attachments)); + memset(&cmdbuf->state.gfx.render.z_attachment, 0, + sizeof(cmdbuf->state.gfx.render.z_attachment)); + memset(&cmdbuf->state.gfx.render.s_attachment, 0, + sizeof(cmdbuf->state.gfx.render.s_attachment)); + cmdbuf->state.gfx.render.bound_attachments = 0; + + cmdbuf->state.gfx.render.layer_count = 0; + *fbinfo = (struct pan_fb_info){ + .tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model), + .nr_samples = 1, + .rt_count = inheritance_info->colorAttachmentCount, + }; + + assert(inheritance_info->colorAttachmentCount <= ARRAY_SIZE(fbinfo->rts)); + + for (uint32_t i = 0; i < inheritance_info->colorAttachmentCount; i++) { + cmdbuf->state.gfx.render.bound_attachments |= + MESA_VK_RP_ATTACHMENT_COLOR_BIT(i); + cmdbuf->state.gfx.render.color_attachments.fmts[i] = + inheritance_info->pColorAttachmentFormats[i]; + cmdbuf->state.gfx.render.color_attachments.samples[i] = + inheritance_info->rasterizationSamples; + } + + if (inheritance_info->depthAttachmentFormat) { + cmdbuf->state.gfx.render.bound_attachments |= + MESA_VK_RP_ATTACHMENT_DEPTH_BIT; + cmdbuf->state.gfx.render.z_attachment.fmt = + inheritance_info->depthAttachmentFormat; + } + + if (inheritance_info->stencilAttachmentFormat) { + cmdbuf->state.gfx.render.bound_attachments |= + MESA_VK_RP_ATTACHMENT_STENCIL_BIT; + cmdbuf->state.gfx.render.s_attachment.fmt = + inheritance_info->stencilAttachmentFormat; + } + + const VkRenderingAttachmentLocationInfoKHR att_loc_info_default = { + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_LOCATION_INFO_KHR, + .colorAttachmentCount = inheritance_info->colorAttachmentCount, + }; + const VkRenderingAttachmentLocationInfoKHR *att_loc_info = + vk_get_command_buffer_rendering_attachment_location_info( + cmdbuf->vk.level, pBeginInfo); + if (att_loc_info == NULL) + att_loc_info = &att_loc_info_default; + + vk_cmd_set_rendering_attachment_locations(&cmdbuf->vk, att_loc_info); +} + VKAPI_ATTR void VKAPI_CALL panvk_per_arch(CmdBeginRendering)(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRenderingInfo) @@ -1958,9 +2064,13 @@ panvk_per_arch(CmdBeginRendering)(VkCommandBuffer commandBuffer, VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx; - panvk_cmd_begin_rendering_init_state(cmdbuf, pRenderingInfo); + bool resuming = pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT; - bool resuming = state->render.flags & VK_RENDERING_RESUMING_BIT; + /* When resuming from a suspended pass, the state should be unchanged. */ + if (resuming) + state->render.flags = pRenderingInfo->flags; + else + panvk_cmd_init_render_state(cmdbuf, pRenderingInfo); /* If we're not resuming, the FBD should be NULL. */ assert(!state->render.fbds.gpu || resuming); diff --git a/src/panfrost/vulkan/panvk_vX_device.c b/src/panfrost/vulkan/panvk_vX_device.c index b178fc9a3e0..920c032d27b 100644 --- a/src/panfrost/vulkan/panvk_vX_device.c +++ b/src/panfrost/vulkan/panvk_vX_device.c @@ -193,7 +193,13 @@ panvk_per_arch(create_device)(struct panvk_physical_device *physical_device, vk_device_dispatch_table_from_entrypoints( &device->cmd_dispatch, &vk_common_device_entrypoints, false); - result = vk_device_init(&device->vk, &physical_device->vk, &dispatch_table, + /* vkCmdExecuteCommands is currently only implemented on v10+. The panvk + * implementation will not run if the vk_cmd_enqueue_unless_primary + * entrypoint is present in the dispatch table. + */ + result = vk_device_init(&device->vk, &physical_device->vk, + PAN_ARCH <= 9 ? + &dispatch_table : &device->cmd_dispatch, pCreateInfo, pAllocator); if (result != VK_SUCCESS) goto err_free_dev;