diff --git a/src/panfrost/vulkan/csf/panvk_cmd_buffer.h b/src/panfrost/vulkan/csf/panvk_cmd_buffer.h index 2ba40533794..42e3968d26a 100644 --- a/src/panfrost/vulkan/csf/panvk_cmd_buffer.h +++ b/src/panfrost/vulkan/csf/panvk_cmd_buffer.h @@ -86,7 +86,7 @@ get_fbd_size(bool has_zs_ext, uint32_t rt_count) (PANVK_IR_##_pass##_PASS * sizeof(uint64_t))) struct panvk_cs_occlusion_query { - uint64_t next; + struct cs_single_link_list_node node; uint64_t syncobj; }; @@ -101,7 +101,7 @@ struct panvk_cs_subqueue_context { struct panvk_cs_desc_ringbuf desc_ringbuf; uint64_t tiler_heap; uint64_t geom_buf; - uint64_t oq_chain; + struct cs_single_link_list oq_chain; } render; struct { uint32_t counter; diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index 2d28171f104..bf17b390c72 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -1793,7 +1793,6 @@ wrap_prev_oq(struct panvk_cmd_buffer *cmdbuf) if (!last_syncobj) return VK_SUCCESS; - uint64_t prev_oq_node = cmdbuf->state.gfx.render.oq.chain; struct pan_ptr new_oq_node = panvk_cmd_alloc_dev_mem( cmdbuf, desc, sizeof(struct panvk_cs_occlusion_query), 8); @@ -1805,39 +1804,19 @@ wrap_prev_oq(struct panvk_cmd_buffer *cmdbuf) struct panvk_cs_occlusion_query *oq = new_oq_node.cpu; *oq = (struct panvk_cs_occlusion_query){ + .node = {.next = 0}, .syncobj = last_syncobj, - .next = prev_oq_node, }; - /* If we already had an OQ in the chain, we don't need to initialize the - * oq_chain field in the subqueue ctx. */ - if (prev_oq_node) - return VK_SUCCESS; - - /* If we're a secondary cmdbuf inside a render pass, we let the primary - * cmdbuf link the OQ chain. */ - if (cmdbuf->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) - return VK_SUCCESS; - struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_FRAGMENT); - struct cs_index oq_node_reg = cs_scratch_reg64(b, 0); + struct cs_index new_node_ptr = cs_scratch_reg64(b, 0); + cs_move64_to(b, new_node_ptr, new_oq_node.gpu); + cs_single_link_list_add_tail( + b, cs_subqueue_ctx_reg(b), + offsetof(struct panvk_cs_subqueue_context, render.oq_chain), new_node_ptr, + offsetof(struct panvk_cs_occlusion_query, node), + cs_scratch_reg_tuple(b, 10, 4)); - cs_move64_to(b, oq_node_reg, new_oq_node.gpu); - - /* If we're resuming, we need to link with the previous oq_chain, if any. */ - if (cmdbuf->state.gfx.render.flags & VK_RENDERING_RESUMING_BIT) { - struct cs_index prev_oq_node_reg = cs_scratch_reg64(b, 2); - - cs_load64_to( - b, prev_oq_node_reg, cs_subqueue_ctx_reg(b), - offsetof(struct panvk_cs_subqueue_context, render.oq_chain)); - cs_store64(b, prev_oq_node_reg, oq_node_reg, - offsetof(struct panvk_cs_occlusion_query, next)); - } - - cs_store64(b, oq_node_reg, cs_subqueue_ctx_reg(b), - offsetof(struct panvk_cs_subqueue_context, render.oq_chain)); - cs_flush_stores(b); return VK_SUCCESS; } @@ -3110,7 +3089,6 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf) struct cs_index tiler_count = cs_reg32(b, 47); struct cs_index oq_chain = cs_scratch_reg64(b, 10); struct cs_index oq_chain_lo = cs_scratch_reg32(b, 10); - struct cs_index oq_chain_hi = cs_scratch_reg32(b, 11); struct cs_index oq_syncobj = cs_scratch_reg64(b, 12); cs_move64_to(b, add_val, 1); @@ -3177,18 +3155,12 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf) cs_store64(b, oq_syncobj, cs_subqueue_ctx_reg(b), offsetof(struct panvk_cs_subqueue_context, render.oq_chain)); - cs_while(b, MALI_CS_CONDITION_ALWAYS, cs_undef()) { + cs_single_link_list_for_each_from(b, oq_chain, + struct panvk_cs_occlusion_query, node) { cs_load64_to(b, oq_syncobj, oq_chain, offsetof(struct panvk_cs_occlusion_query, syncobj)); - cs_load64_to(b, oq_chain, oq_chain, - offsetof(struct panvk_cs_occlusion_query, next)); cs_sync32_set(b, true, MALI_CS_SYNC_SCOPE_CSG, add_val_lo, oq_syncobj, cs_defer(SB_MASK(DEFERRED_FLUSH), SB_ID(DEFERRED_SYNC))); - cs_if(b, MALI_CS_CONDITION_NEQUAL, oq_chain_lo) - cs_continue(b); - cs_if(b, MALI_CS_CONDITION_NEQUAL, oq_chain_hi) - cs_continue(b); - cs_break(b); } } @@ -3244,19 +3216,13 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf) cs_store64( \ b, oq_syncobj, cs_subqueue_ctx_reg(b), \ offsetof(struct panvk_cs_subqueue_context, render.oq_chain)); \ - cs_while(b, MALI_CS_CONDITION_ALWAYS, cs_undef()) { \ + cs_single_link_list_for_each_from( \ + b, oq_chain, struct panvk_cs_occlusion_query, node) { \ cs_load64_to(b, oq_syncobj, oq_chain, \ offsetof(struct panvk_cs_occlusion_query, syncobj)); \ - cs_load64_to(b, oq_chain, oq_chain, \ - offsetof(struct panvk_cs_occlusion_query, next)); \ cs_sync32_set( \ b, true, MALI_CS_SYNC_SCOPE_CSG, add_val_lo, oq_syncobj, \ cs_defer(SB_MASK(DEFERRED_FLUSH), SB_ID(DEFERRED_SYNC))); \ - cs_if(b, MALI_CS_CONDITION_NEQUAL, oq_chain_lo) \ - cs_continue(b); \ - cs_if(b, MALI_CS_CONDITION_NEQUAL, oq_chain_hi) \ - cs_continue(b); \ - cs_break(b); \ } \ } \ cs_sync64_add(b, true, MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr, \