panvk: Handle provoking vertex and simultaneous reuse on v14

The provoking vertex bit in RUN_FRAGMENT2 is located in a register
instead of a descriptor stored in memory. That means we don't need to
patch memory, resulting in a much leaner implementation compared to
RUN_FRAGMENT.

Also, implement the simultaneous reuse copy path with the corresponding
tiler pointer patching.

Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com>
This commit is contained in:
Marc Alcala Prieto 2026-02-11 15:49:39 +01:00
parent d90f075c3c
commit 47f127685a
3 changed files with 38 additions and 9 deletions

View file

@ -51,6 +51,7 @@
#include "vk_render_pass.h"
#include "poly/geometry.h"
#if PAN_ARCH < 14
static enum cs_reg_perm
provoking_vertex_fn_reg_perm_cb(struct cs_builder *b, unsigned reg)
{
@ -202,6 +203,7 @@ panvk_per_arch(device_draw_context_cleanup)(struct panvk_device *dev)
panvk_priv_bo_unref(dev->draw_ctx->fns_bo);
vk_free(&dev->vk.alloc, dev->draw_ctx);
}
#endif /* PAN_ARCH < 14 */
static void
emit_vs_attrib(struct panvk_cmd_buffer *cmdbuf,
@ -1382,9 +1384,6 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
cs_wait_slot(b, SB_ID(LS));
#endif /* PAN_ARCH >= 14 */
bool unset_provoking_vertex =
cmdbuf->state.gfx.render.first_provoking_vertex == U_TRISTATE_UNSET;
if (copy_fbds) {
struct cs_index cur_tiler = cs_reg64(b, PANVK_CS_REG_TILER_DESC_PTR);
struct cs_index dst_fbd_ptr = cs_sr_reg64(b, FRAGMENT, FBD_POINTER);
@ -1414,10 +1413,27 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
* framebuffer size is aligned on 64-bytes. */
assert(fbd_sz == ALIGN_POT(fbd_sz, 64));
#if PAN_ARCH >= 14
for (uint32_t fbd_off = 0; fbd_off < fbd_sz; fbd_off += 64) {
cs_load_to(b, cs_scratch_reg_tuple(b, 0, 16), src_fbd_ptr,
BITFIELD_MASK(16), fbd_off);
/* Patch the Tiler pointer. */
if (fbd_off == 0)
cs_add64(b, cs_scratch_reg64(b, 0), cur_tiler, 0);
cs_store(b, cs_scratch_reg_tuple(b, 0, 16), dst_fbd_ptr,
BITFIELD_MASK(16), fbd_off);
}
#else
bool unset_provoking_vertex =
cmdbuf->state.gfx.render.first_provoking_vertex == U_TRISTATE_UNSET;
for (uint32_t fbd_off = 0; fbd_off < fbd_sz; fbd_off += 64) {
if (fbd_off == 0) {
cs_load_to(b, cs_scratch_reg_tuple(b, 0, 14), src_fbd_ptr,
BITFIELD_MASK(14), fbd_off);
/* Patch the Tiler pointer. */
cs_add64(b, cs_scratch_reg64(b, 14), cur_tiler, 0);
/* If we don't know what provoking vertex mode the
@ -1437,6 +1453,7 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
cs_store(b, cs_scratch_reg_tuple(b, 0, 16), dst_fbd_ptr,
BITFIELD_MASK(16), fbd_off);
}
#endif
/* Finish stores to pass_dst_fbd_ptr. */
cs_flush_stores(b);
@ -1477,6 +1494,7 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
cmdbuf->state.gfx.render.tiler);
}
#if PAN_ARCH < 14
/* If we don't know what provoking vertex mode the application wants yet,
* leave space to patch it later */
if (cmdbuf->state.gfx.render.first_provoking_vertex == U_TRISTATE_UNSET) {
@ -1498,6 +1516,7 @@ get_fb_descs(struct panvk_cmd_buffer *cmdbuf)
cs_maybe(b, &cmdbuf->state.gfx.render.maybe_set_fbds_provoking_vertex)
cs_call(b, addr_reg, length_reg);
}
#endif
}
return VK_SUCCESS;
@ -3426,6 +3445,17 @@ cs_emit_static_fragment_state(struct cs_builder *b,
}
cs_move32_to(b, cs_sr_reg32(b, FRAGMENT, FLAGS_1), flags1.opaque[0]);
/* If we don't know what provoking vertex mode the application wants yet,
* leave space to patch it later */
if (cmdbuf->state.gfx.render.first_provoking_vertex == U_TRISTATE_UNSET) {
cs_maybe(b, &cmdbuf->state.gfx.render.maybe_set_fbds_provoking_vertex)
{
/* provoking_vertex flag is bit 14 of Fragment Flags 1. */
cs_add32(b, cs_sr_reg32(b, FRAGMENT, FLAGS_1),
cs_sr_reg32(b, FRAGMENT, FLAGS_1), -(1 << 14));
}
}
/* Leave the remaining RUN_FRAGMENT2 staging registers as zero. */
}
#endif /* PAN_ARCH >= 14 */

View file

@ -243,7 +243,7 @@ struct panvk_cmd_graphics_state {
} \
} while (0)
#if PAN_ARCH >= 10
#if PAN_ARCH >= 10 && PAN_ARCH < 14
struct panvk_device_draw_context {
struct panvk_priv_bo *fns_bo;
uint64_t fn_set_fbds_provoking_vertex_stride;
@ -376,8 +376,7 @@ cached_fs_required(ASSERTED const struct panvk_cmd_graphics_state *state,
gfx_state_set_dirty(__cmdbuf, FS_PUSH_UNIFORMS); \
} while (0)
#if PAN_ARCH >= 10
#if PAN_ARCH >= 10 && PAN_ARCH < 14
VkResult
panvk_per_arch(device_draw_context_init)(struct panvk_device *dev);

View file

@ -550,7 +550,7 @@ panvk_per_arch(create_device)(struct panvk_physical_device *physical_device,
goto err_free_precomp;
}
#if PAN_ARCH >= 10
#if PAN_ARCH >= 10 && PAN_ARCH < 14
result = panvk_per_arch(device_draw_context_init)(device);
if (result != VK_SUCCESS)
goto err_free_mem_cache;
@ -616,7 +616,7 @@ err_finish_queues:
panvk_meta_cleanup(device);
err_free_draw_ctx:
#if PAN_ARCH >= 10
#if PAN_ARCH >= 10 && PAN_ARCH < 14
panvk_per_arch(device_draw_context_cleanup)(device);
err_free_mem_cache:
#endif
@ -679,7 +679,7 @@ panvk_per_arch(destroy_device)(struct panvk_device *device,
}
panvk_precomp_cleanup(device);
#if PAN_ARCH >= 10
#if PAN_ARCH >= 10 && PAN_ARCH < 14
panvk_per_arch(device_draw_context_cleanup)(device);
#endif
panvk_meta_cleanup(device);