From b9eed7a0d4e4872dc5e0df2c8475092658ec7452 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 8 Nov 2024 18:00:27 +0100 Subject: [PATCH] panvk: Cache the fs_required() result get_fs() (and thus fs_required()) is called enough times during a draw that it makes sense to cache the value in the graphics state instead of calling fs_required() repeatedly. Signed-off-by: Boris Brezillon Reviewed-by: Lars-Ivar Hesselberg Simonsen Part-of: --- src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c | 10 ++++++++++ src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c | 8 +++++++- src/panfrost/vulkan/panvk_cmd_draw.h | 16 ++++++++++++++-- 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index e262cfed191..c22ae9a0d4f 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -1386,6 +1386,11 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) if (!panvk_priv_mem_dev_addr(vs->spds.pos_points)) return; + /* Needs to be done before get_fs() is called because it depends on + * fs.required being initialized. */ + cmdbuf->state.gfx.fs.required = + fs_required(&cmdbuf->state.gfx, &cmdbuf->vk.dynamic_graphics_state); + result = prepare_draw(cmdbuf, draw); if (result != VK_SUCCESS) return; @@ -1510,6 +1515,11 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf, if (!panvk_priv_mem_dev_addr(vs->spds.pos_points)) return; + /* Needs to be done before get_fs() is called because it depends on + * fs.required being initialized. */ + cmdbuf->state.gfx.fs.required = + fs_required(&cmdbuf->state.gfx, &cmdbuf->vk.dynamic_graphics_state); + /* Layered indirect draw (VK_EXT_shader_viewport_index_layer) needs * additional changes. We allow layer_count == 0 because that happens * when mixing dynamic rendering and secondary command buffers. Once diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c index 1d51db15192..930da0d8134 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c @@ -1187,7 +1187,6 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { struct panvk_batch *batch = cmdbuf->cur_batch; const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; - const struct panvk_shader *fs = get_fs(cmdbuf); struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc; struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc; struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state; @@ -1201,6 +1200,13 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) if (!panvk_priv_mem_dev_addr(vs->rsd)) return; + /* Needs to be done before get_fs() is called because it depends on + * fs.required being initialized. */ + cmdbuf->state.gfx.fs.required = + fs_required(&cmdbuf->state.gfx, &cmdbuf->vk.dynamic_graphics_state); + + const struct panvk_shader *fs = get_fs(cmdbuf); + /* There are only 16 bits in the descriptor for the job ID. Each job has a * pilot shader dealing with descriptor copies, and we need one * pair per draw. diff --git a/src/panfrost/vulkan/panvk_cmd_draw.h b/src/panfrost/vulkan/panvk_cmd_draw.h index 18364c97fec..ace89963f85 100644 --- a/src/panfrost/vulkan/panvk_cmd_draw.h +++ b/src/panfrost/vulkan/panvk_cmd_draw.h @@ -100,6 +100,7 @@ struct panvk_cmd_graphics_state { struct { const struct panvk_shader *shader; struct panvk_shader_desc_state desc; + bool required; #if PAN_ARCH <= 7 mali_ptr rsd; #endif @@ -219,9 +220,20 @@ fs_required(const struct panvk_cmd_graphics_state *state, return (fs_info->fs.writes_depth || fs_info->fs.writes_stencil); } +static inline bool +cached_fs_required(ASSERTED const struct panvk_cmd_graphics_state *state, + ASSERTED const struct vk_dynamic_graphics_state *dyn_state, + bool cached_value) +{ + /* Make sure the cached value was properly initialized. */ + assert(fs_required(state, dyn_state) == cached_value); + return cached_value; +} + #define get_fs(__cmdbuf) \ - (fs_required(&(__cmdbuf)->state.gfx, \ - &(__cmdbuf)->vk.dynamic_graphics_state) \ + (cached_fs_required(&(__cmdbuf)->state.gfx, \ + &(__cmdbuf)->vk.dynamic_graphics_state, \ + (__cmdbuf)->state.gfx.fs.required) \ ? (__cmdbuf)->state.gfx.fs.shader \ : NULL)