From 443ddace70695bd490d959d783e22ad49c69ccb2 Mon Sep 17 00:00:00 2001 From: Olivia Lee Date: Mon, 17 Nov 2025 13:29:13 -0800 Subject: [PATCH] panvk/csf: merge v10 and v11 paths in issue_fragment_jobs This is quite a lot of logic to duplicate verbatim just to deal with the slightly different synchronization. Signed-off-by: Olivia Lee Reviewed-by: Lars-Ivar Hesselberg Simonsen Acked-by: Boris Brezillon Part-of: --- src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c | 115 +++++++------------- 1 file changed, 37 insertions(+), 78 deletions(-) diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index e2bde9abb56..9de45f79c25 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -3278,66 +3278,6 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf) offsetof(struct panvk_cs_subqueue_context, syncobjs)); cs_add64(b, sync_addr, sync_addr, PANVK_SUBQUEUE_FRAGMENT * sizeof(struct panvk_cs_sync64)); - - if (td_count == 1) { - cs_load_to(b, completed, cur_tiler, BITFIELD_MASK(4), 40); - cs_finish_fragment(b, true, completed_top, completed_bottom, - cs_defer_indirect()); - } else if (td_count > 1) { - cs_while(b, MALI_CS_CONDITION_GREATER, tiler_count) { - cs_load_to(b, completed, cur_tiler, BITFIELD_MASK(4), 40); - cs_finish_fragment(b, false, completed_top, completed_bottom, - cs_defer_indirect()); - cs_update_frag_ctx(b) - cs_add64(b, cur_tiler, cur_tiler, pan_size(TILER_CONTEXT)); - cs_add32(b, tiler_count, tiler_count, -1); - } - cs_frag_end(b, cs_defer_indirect()); - } - - if (free_render_descs) { - cs_sync32_add(b, true, MALI_CS_SYNC_SCOPE_CSG, release_sz, - ringbuf_sync_addr, cs_defer_indirect()); - } - - if (has_oq_chain) { - struct cs_index flush_id = oq_chain_lo; - cs_move32_to(b, flush_id, 0); - - /* FLUSH_CACHE2 is part of the deferred group so we need to - * temporarily set DEFERRED_FLUSH here to use the right scoreboard in - * indirect mode */ - cs_set_state_imm32(b, MALI_CS_SET_STATE_TYPE_SB_SEL_DEFERRED, - SB_ID(DEFERRED_FLUSH)); - cs_flush_caches(b, MALI_CS_FLUSH_MODE_CLEAN, MALI_CS_FLUSH_MODE_CLEAN, - MALI_CS_OTHER_FLUSH_MODE_NONE, flush_id, - cs_defer_indirect()); - cs_set_state_imm32(b, MALI_CS_SET_STATE_TYPE_SB_SEL_DEFERRED, - SB_ID(DEFERRED_SYNC)); - - cs_load64_to(b, oq_chain, cs_subqueue_ctx_reg(b), - offsetof(struct panvk_cs_subqueue_context, render.oq_chain)); - - /* For WAR dependency on subqueue_context.render.oq_chain. */ - cs_flush_loads(b); - - /* We use oq_syncobj as a placeholder to reset the oq_chain. */ - cs_move64_to(b, oq_syncobj, 0); - cs_store64(b, oq_syncobj, cs_subqueue_ctx_reg(b), - offsetof(struct panvk_cs_subqueue_context, render.oq_chain)); - - cs_single_link_list_for_each_from(b, oq_chain, - struct panvk_cs_occlusion_query, node) { - cs_load64_to(b, oq_syncobj, oq_chain, - offsetof(struct panvk_cs_occlusion_query, syncobj)); - cs_sync32_set(b, true, MALI_CS_SYNC_SCOPE_CSG, add_val_lo, oq_syncobj, - cs_defer(SB_MASK(DEFERRED_FLUSH), SB_ID(DEFERRED_SYNC))); - } - } - - panvk_instr_sync64_add(cmdbuf, PANVK_SUBQUEUE_FRAGMENT, true, - MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr, - cs_defer_indirect()); #else struct cs_index iter_sb = cs_scratch_reg32(b, 2); struct cs_index cmp_scratch = cs_scratch_reg32(b, 3); @@ -3347,10 +3287,17 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf) offsetof(struct panvk_cs_subqueue_context, syncobjs)); cs_add64(b, sync_addr, sync_addr, PANVK_SUBQUEUE_FRAGMENT * sizeof(struct panvk_cs_sync64)); +#endif +#if PAN_ARCH >= 11 + { + const struct cs_async_op async = cs_defer_indirect(); +#else cs_match_iter_sb(b, x, iter_sb, cmp_scratch) { const struct cs_async_op async = cs_defer(SB_WAIT_ITER(x), SB_ID(DEFERRED_SYNC)); +#endif + if (td_count == 1) { cs_load_to(b, completed, cur_tiler, BITFIELD_MASK(4), 40); cs_finish_fragment(b, true, completed_top, completed_bottom, async); @@ -3365,41 +3312,53 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf) } cs_frag_end(b, async); } + if (free_render_descs) { cs_sync32_add(b, true, MALI_CS_SYNC_SCOPE_CSG, release_sz, ringbuf_sync_addr, async); } + if (has_oq_chain) { struct cs_index flush_id = oq_chain_lo; cs_move32_to(b, flush_id, 0); - cs_flush_caches(b, MALI_CS_FLUSH_MODE_CLEAN, - MALI_CS_FLUSH_MODE_CLEAN, - MALI_CS_OTHER_FLUSH_MODE_NONE, flush_id, - cs_defer(SB_WAIT_ITER(x), SB_ID(DEFERRED_FLUSH))); - cs_load64_to( - b, oq_chain, cs_subqueue_ctx_reg(b), - offsetof(struct panvk_cs_subqueue_context, render.oq_chain)); + +#if PAN_ARCH >= 11 + /* FLUSH_CACHE2 is part of the deferred group so we need to + * temporarily set DEFERRED_FLUSH here to use the right scoreboard in + * indirect mode */ + cs_set_state_imm32(b, MALI_CS_SET_STATE_TYPE_SB_SEL_DEFERRED, + SB_ID(DEFERRED_FLUSH)); +#endif + cs_flush_caches(b, MALI_CS_FLUSH_MODE_CLEAN, MALI_CS_FLUSH_MODE_CLEAN, + MALI_CS_OTHER_FLUSH_MODE_NONE, flush_id, async); +#if PAN_ARCH >= 11 + cs_set_state_imm32(b, MALI_CS_SET_STATE_TYPE_SB_SEL_DEFERRED, + SB_ID(DEFERRED_SYNC)); +#endif + + cs_load64_to(b, oq_chain, cs_subqueue_ctx_reg(b), + offsetof(struct panvk_cs_subqueue_context, render.oq_chain)); + /* For WAR dependency on subqueue_context.render.oq_chain. */ cs_flush_loads(b); + /* We use oq_syncobj as a placeholder to reset the oq_chain. */ cs_move64_to(b, oq_syncobj, 0); - cs_store64( - b, oq_syncobj, cs_subqueue_ctx_reg(b), - offsetof(struct panvk_cs_subqueue_context, render.oq_chain)); - cs_single_link_list_for_each_from( - b, oq_chain, struct panvk_cs_occlusion_query, node) { + cs_store64(b, oq_syncobj, cs_subqueue_ctx_reg(b), + offsetof(struct panvk_cs_subqueue_context, render.oq_chain)); + + cs_single_link_list_for_each_from(b, oq_chain, + struct panvk_cs_occlusion_query, node) { cs_load64_to(b, oq_syncobj, oq_chain, offsetof(struct panvk_cs_occlusion_query, syncobj)); - cs_sync32_set( - b, true, MALI_CS_SYNC_SCOPE_CSG, add_val_lo, oq_syncobj, - cs_defer(SB_MASK(DEFERRED_FLUSH), SB_ID(DEFERRED_SYNC))); + cs_sync32_set(b, true, MALI_CS_SYNC_SCOPE_CSG, add_val_lo, oq_syncobj, + cs_defer(SB_MASK(DEFERRED_FLUSH), SB_ID(DEFERRED_SYNC))); } } + panvk_instr_sync64_add(cmdbuf, PANVK_SUBQUEUE_FRAGMENT, true, - MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr, - async); + MALI_CS_SYNC_SCOPE_CSG, add_val, sync_addr, async); } -#endif /* Update the ring buffer position. */ if (free_render_descs) {