From fab9558ab81b1c0aac6ff3cfe09961b0e9a3dc7d Mon Sep 17 00:00:00 2001 From: Marc Alcala Prieto Date: Wed, 11 Feb 2026 15:49:39 +0100 Subject: [PATCH] panvk: Handle provoking vertex and simultaneous reuse on v14 The provoking vertex bit in RUN_FRAGMENT2 is located in a register instead of a descriptor stored in memory. That means we don't need to patch memory, resulting in a much leaner implementation compared to RUN_FRAGMENT. Also, implement the simultaneous reuse copy path with the corresponding tiler pointer patching. Reviewed-by: Lars-Ivar Hesselberg Simonsen --- src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c | 36 +++++++++++++++++++-- src/panfrost/vulkan/panvk_cmd_draw.h | 5 ++- src/panfrost/vulkan/panvk_vX_device.c | 6 ++-- 3 files changed, 38 insertions(+), 9 deletions(-) diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index f8ce561acb2..7d430591aa6 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -51,6 +51,7 @@ #include "vk_render_pass.h" #include "poly/geometry.h" +#if PAN_ARCH < 14 static enum cs_reg_perm provoking_vertex_fn_reg_perm_cb(struct cs_builder *b, unsigned reg) { @@ -202,6 +203,7 @@ panvk_per_arch(device_draw_context_cleanup)(struct panvk_device *dev) panvk_priv_bo_unref(dev->draw_ctx->fns_bo); vk_free(&dev->vk.alloc, dev->draw_ctx); } +#endif /* PAN_ARCH < 14 */ static void emit_vs_attrib(struct panvk_cmd_buffer *cmdbuf, @@ -1382,9 +1384,6 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf) cs_wait_slot(b, SB_ID(LS)); #endif /* PAN_ARCH >= 14 */ - bool unset_provoking_vertex = - cmdbuf->state.gfx.render.first_provoking_vertex == U_TRISTATE_UNSET; - if (copy_fbds) { struct cs_index cur_tiler = cs_reg64(b, PANVK_CS_REG_TILER_DESC_PTR); #if PAN_ARCH >= 14 @@ -1418,10 +1417,27 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf) * framebuffer size is aligned on 64-bytes. */ assert(fbd_sz == ALIGN_POT(fbd_sz, 64)); +#if PAN_ARCH >= 14 + for (uint32_t fbd_off = 0; fbd_off < fbd_sz; fbd_off += 64) { + cs_load_to(b, cs_scratch_reg_tuple(b, 0, 16), src_fbd_ptr, + BITFIELD_MASK(16), fbd_off); + + /* Patch the Tiler pointer. */ + if (fbd_off == 0) + cs_add64(b, cs_scratch_reg64(b, 0), cur_tiler, 0); + + cs_store(b, cs_scratch_reg_tuple(b, 0, 16), dst_fbd_ptr, + BITFIELD_MASK(16), fbd_off); + } +#else + bool unset_provoking_vertex = + cmdbuf->state.gfx.render.first_provoking_vertex == U_TRISTATE_UNSET; for (uint32_t fbd_off = 0; fbd_off < fbd_sz; fbd_off += 64) { if (fbd_off == 0) { cs_load_to(b, cs_scratch_reg_tuple(b, 0, 14), src_fbd_ptr, BITFIELD_MASK(14), fbd_off); + + /* Patch the Tiler pointer. */ cs_add64(b, cs_scratch_reg64(b, 14), cur_tiler, 0); /* If we don't know what provoking vertex mode the @@ -1441,6 +1457,7 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf) cs_store(b, cs_scratch_reg_tuple(b, 0, 16), dst_fbd_ptr, BITFIELD_MASK(16), fbd_off); } +#endif /* Finish stores to pass_dst_fbd_ptr. */ cs_flush_stores(b); @@ -1486,6 +1503,7 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf) cmdbuf->state.gfx.render.tiler); } +#if PAN_ARCH < 14 /* If we don't know what provoking vertex mode the application wants yet, * leave space to patch it later */ if (cmdbuf->state.gfx.render.first_provoking_vertex == U_TRISTATE_UNSET) { @@ -1507,6 +1525,7 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf) cs_maybe(b, &cmdbuf->state.gfx.render.maybe_set_fbds_provoking_vertex) cs_call(b, addr_reg, length_reg); } +#endif } return VK_SUCCESS; @@ -3433,6 +3452,17 @@ cs_emit_static_fragment_state(struct cs_builder *b, } cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, FLAGS_1), flags1.opaque[0]); + /* If we don't know what provoking vertex mode the application wants yet, + * leave space to patch it later */ + if (cmdbuf->state.gfx.render.first_provoking_vertex == U_TRISTATE_UNSET) { + cs_maybe(b, &cmdbuf->state.gfx.render.maybe_set_fbds_provoking_vertex) + { + /* provoking_vertex flag is bit 14 of Fragment Flags 1. */ + cs_add32(b, cs_sr_reg32(b, FRAGMENT, FLAGS_1), + cs_sr_reg32(b, FRAGMENT, FLAGS_1), -(1 << 14)); + } + } + /* Leave the remaining RUN_FRAGMENT2 staging registers as zero. */ } #endif /* PAN_ARCH >= 14 */ diff --git a/src/panfrost/vulkan/panvk_cmd_draw.h b/src/panfrost/vulkan/panvk_cmd_draw.h index 8de69cfdb42..7c11787fd44 100644 --- a/src/panfrost/vulkan/panvk_cmd_draw.h +++ b/src/panfrost/vulkan/panvk_cmd_draw.h @@ -243,7 +243,7 @@ struct panvk_cmd_graphics_state { } \ } while (0) -#if PAN_ARCH >= 10 +#if PAN_ARCH >= 10 && PAN_ARCH < 14 struct panvk_device_draw_context { struct panvk_priv_bo *fns_bo; uint64_t fn_set_fbds_provoking_vertex_stride; @@ -376,8 +376,7 @@ cached_fs_required(ASSERTED const struct panvk_cmd_graphics_state *state, gfx_state_set_dirty(__cmdbuf, FS_PUSH_UNIFORMS); \ } while (0) - -#if PAN_ARCH >= 10 +#if PAN_ARCH >= 10 && PAN_ARCH < 14 VkResult panvk_per_arch(device_draw_context_init)(struct panvk_device *dev); diff --git a/src/panfrost/vulkan/panvk_vX_device.c b/src/panfrost/vulkan/panvk_vX_device.c index c32d2f279e8..93b8a8e21af 100644 --- a/src/panfrost/vulkan/panvk_vX_device.c +++ b/src/panfrost/vulkan/panvk_vX_device.c @@ -550,7 +550,7 @@ panvk_per_arch(create_device)(struct panvk_physical_device *physical_device, goto err_free_precomp; } -#if PAN_ARCH >= 10 +#if PAN_ARCH >= 10 && PAN_ARCH < 14 result = panvk_per_arch(device_draw_context_init)(device); if (result != VK_SUCCESS) goto err_free_mem_cache; @@ -616,7 +616,7 @@ err_finish_queues: panvk_meta_cleanup(device); err_free_draw_ctx: -#if PAN_ARCH >= 10 +#if PAN_ARCH >= 10 && PAN_ARCH < 14 panvk_per_arch(device_draw_context_cleanup)(device); err_free_mem_cache: #endif @@ -679,7 +679,7 @@ panvk_per_arch(destroy_device)(struct panvk_device *device, } panvk_precomp_cleanup(device); -#if PAN_ARCH >= 10 +#if PAN_ARCH >= 10 && PAN_ARCH < 14 panvk_per_arch(device_draw_context_cleanup)(device); #endif panvk_meta_cleanup(device);