From 4fa1eddb4cff78dcb04dc03425d77bd30f9daa3d Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 15 Jan 2026 12:17:55 +0200 Subject: [PATCH] anv: optimize binding table flushing Split emission from pointers programming. That way we can switch back & forth between blorp & applications shaders and never emit binding tables, we just reprogram the pointers. Signed-off-by: Lionel Landwerlin Reviewed-by: Alyssa Rosenzweig Part-of: --- src/intel/vulkan/anv_cmd_buffer.c | 6 ++- src/intel/vulkan/anv_private.h | 10 ++++ src/intel/vulkan/genX_blorp_exec.c | 3 +- src/intel/vulkan/genX_cmd_compute.c | 20 ++++---- src/intel/vulkan/genX_cmd_draw.c | 72 +++++++++++++++------------ src/intel/vulkan/genX_simple_shader.c | 2 +- 6 files changed, 67 insertions(+), 46 deletions(-) diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index d52fe59d4e2..5aa4805f9ad 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -436,7 +436,8 @@ set_dirty_for_bind_map(struct anv_cmd_buffer *cmd_buffer, const struct anv_pipeline_bind_map *map) { assert(stage < ARRAY_SIZE(cmd_buffer->state.surface_sha1s)); - if (mem_update(cmd_buffer->state.surface_sha1s[stage], + if (map->surface_count > 0 && + mem_update(cmd_buffer->state.surface_sha1s[stage], map->surface_sha1, sizeof(map->surface_sha1))) { anv_cmd_buffer_dirty_descriptors(cmd_buffer, mesa_to_vk_shader_stage(stage), @@ -444,7 +445,8 @@ set_dirty_for_bind_map(struct anv_cmd_buffer *cmd_buffer, } assert(stage < ARRAY_SIZE(cmd_buffer->state.sampler_sha1s)); - if (mem_update(cmd_buffer->state.sampler_sha1s[stage], + if (map->sampler_count > 0 && + mem_update(cmd_buffer->state.sampler_sha1s[stage], map->sampler_sha1, sizeof(map->sampler_sha1))) { anv_cmd_buffer_dirty_descriptors(cmd_buffer, mesa_to_vk_shader_stage(stage), diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 3582c519da7..783f52643f6 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -4739,7 +4739,17 @@ struct anv_cmd_state { enum anv_query_bits clear_bits; } queries; + /** Tracks whether 3DSTATE_BINDING_TABLE_POINTERS_* instructions need + * emissions + */ + VkShaderStageFlags descriptors_pointers_dirty; + /** Tracks whether binding tables needs to be emitted (leads to + * 3DSTATE_BINDING_TABLE_POINTERS_* emission once flushed) + */ VkShaderStageFlags descriptors_dirty; + /** Tracks push descriptor set emission (leads to + * 3DSTATE_BINDING_TABLE_POINTERS_* emission once flushed) + */ VkShaderStageFlags push_descriptors_dirty; /** Tracks the 3DSTATE_CONSTANT_* instruction that needs to be reemitted */ VkShaderStageFlags push_constants_dirty; diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c index 56dcea84b88..256076611af 100644 --- a/src/intel/vulkan/genX_blorp_exec.c +++ b/src/intel/vulkan/genX_blorp_exec.c @@ -438,7 +438,7 @@ blorp_exec_on_render(struct blorp_batch *batch, cmd_buffer->state.gfx.vb_dirty = ~0; cmd_buffer->state.gfx.dirty |= dirty; if (blorp_uses_bti_rt_writes(batch, params)) - cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS; + cmd_buffer->state.descriptors_pointers_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS; cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS; } @@ -461,6 +461,7 @@ blorp_exec_on_compute(struct blorp_batch *batch, anv_cmd_buffer_dirty_descriptors(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT, "blorp compute"); + cmd_buffer->state.descriptors_pointers_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; cmd_buffer->state.compute.pipeline_dirty = true; diff --git a/src/intel/vulkan/genX_cmd_compute.c b/src/intel/vulkan/genX_cmd_compute.c index 9d7432a8375..1c9625fab23 100644 --- a/src/intel/vulkan/genX_cmd_compute.c +++ b/src/intel/vulkan/genX_cmd_compute.c @@ -193,16 +193,18 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) &cmd_buffer->state.compute.base), "dirty compute descriptor"); - if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || - cmd_buffer->state.compute.pipeline_dirty) { - genX(cmd_buffer_flush_descriptor_sets)( - cmd_buffer, - &cmd_buffer->state.compute.base, - VK_SHADER_STAGE_COMPUTE_BIT, - (const struct anv_shader **)&comp_state->shader, 1); + if (cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) { + cmd_buffer->state.descriptors_pointers_dirty |= + genX(cmd_buffer_flush_descriptor_sets)( + cmd_buffer, + &cmd_buffer->state.compute.base, + VK_SHADER_STAGE_COMPUTE_BIT, + (const struct anv_shader **)&comp_state->shader, 1); cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT; - + } #if GFX_VERx10 < 125 + if ((cmd_buffer->state.descriptors_pointers_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || + cmd_buffer->state.compute.pipeline_dirty) { uint32_t iface_desc_data_dw[GENX(INTERFACE_DESCRIPTOR_DATA_length)]; struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = { .BindingTablePointer = @@ -224,8 +226,8 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) mid.InterfaceDescriptorTotalLength = size; mid.InterfaceDescriptorDataStartAddress = state.offset; } -#endif } +#endif if (cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_COMPUTE_BIT) { diff --git a/src/intel/vulkan/genX_cmd_draw.c b/src/intel/vulkan/genX_cmd_draw.c index fc74e432717..a2a3bb1635a 100644 --- a/src/intel/vulkan/genX_cmd_draw.c +++ b/src/intel/vulkan/genX_cmd_draw.c @@ -632,8 +632,6 @@ cmd_buffer_flush_mesh_inline_data(struct anv_cmd_buffer *cmd_buffer, data.InlineData[ANV_INLINE_PARAM_MESH_PROVOKING_VERTEX / 4] = gfx->dyn_state.mesh_provoking_vertex; } } - - cmd_buffer->state.push_constants_dirty &= ~dirty_stages; } #endif @@ -833,19 +831,30 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer) const bool any_dynamic_state_dirty = vk_dynamic_graphics_state_any_dirty(dyn); - uint32_t descriptors_dirty = cmd_buffer->state.descriptors_dirty & - gfx->active_stages; - descriptors_dirty |= + cmd_buffer->state.descriptors_dirty |= genX(cmd_buffer_flush_push_descriptors)(cmd_buffer, &cmd_buffer->state.gfx.base); - if (!cmd_buffer->state.gfx.dirty && !descriptors_dirty && + uint32_t descriptors_dirty = cmd_buffer->state.descriptors_dirty & + gfx->active_stages; + cmd_buffer->state.descriptors_pointers_dirty |= + descriptors_dirty & VK_SHADER_STAGE_ALL_GRAPHICS; + uint32_t descriptors_pointers_dirty = + cmd_buffer->state.descriptors_pointers_dirty & gfx->active_stages; + + /* Because we're pushing UBOs, we have to push whenever either descriptors + * or push constants is dirty. + */ + uint32_t push_constants_dirty = + (cmd_buffer->state.push_constants_dirty | + cmd_buffer->state.descriptors_dirty) & gfx->active_stages; + + if (!cmd_buffer->state.gfx.dirty && + !descriptors_dirty && + !descriptors_pointers_dirty && !any_dynamic_state_dirty && - ((cmd_buffer->state.push_constants_dirty & - (VK_SHADER_STAGE_ALL_GRAPHICS | - VK_SHADER_STAGE_TASK_BIT_EXT | - VK_SHADER_STAGE_MESH_BIT_EXT)) == 0)) + !push_constants_dirty) return; if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_XFB_ENABLE) { @@ -955,42 +964,37 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer) * emitting push constants, on SKL+ we have to emit the corresponding * 3DSTATE_BINDING_TABLE_POINTER_* for the push constants to take effect. */ - uint32_t dirty = 0; if (descriptors_dirty) { - dirty = genX(cmd_buffer_flush_descriptor_sets)( - cmd_buffer, - &cmd_buffer->state.gfx.base, - descriptors_dirty, - (const struct anv_shader **)gfx->shaders, - ARRAY_SIZE(gfx->shaders)); - cmd_buffer->state.descriptors_dirty &= ~dirty; + descriptors_pointers_dirty |= + genX(cmd_buffer_flush_descriptor_sets)( + cmd_buffer, + &cmd_buffer->state.gfx.base, + descriptors_dirty, + (const struct anv_shader **)gfx->shaders, + ARRAY_SIZE(gfx->shaders)) & VK_SHADER_STAGE_ALL_GRAPHICS; } - if (dirty || cmd_buffer->state.push_constants_dirty) { - /* Because we're pushing UBOs, we have to push whenever either - * descriptors or push constants is dirty. - */ - VkShaderStageFlags push_stages = dirty | - (cmd_buffer->state.push_constants_dirty & gfx->active_stages); + push_constants_dirty = (cmd_buffer->state.push_constants_dirty | + cmd_buffer->state.descriptors_dirty) & gfx->active_stages; + if (push_constants_dirty) { #if INTEL_NEEDS_WA_1604061319 /* Testing shows that all the 3DSTATE_CONSTANT_XS need to be emitted if * any stage has 3DSTATE_CONSTANT_XS emitted. */ - push_stages |= gfx->active_stages; + push_constants_dirty |= gfx->active_stages; #endif - cmd_buffer_flush_gfx_push_constants(cmd_buffer, - push_stages & VK_SHADER_STAGE_ALL_GRAPHICS); + cmd_buffer_flush_gfx_push_constants( + cmd_buffer, + push_constants_dirty & VK_SHADER_STAGE_ALL_GRAPHICS); #if GFX_VERx10 >= 125 cmd_buffer_flush_mesh_inline_data( - cmd_buffer, push_stages & (VK_SHADER_STAGE_TASK_BIT_EXT | - VK_SHADER_STAGE_MESH_BIT_EXT)); + cmd_buffer, push_constants_dirty & (VK_SHADER_STAGE_TASK_BIT_EXT | + VK_SHADER_STAGE_MESH_BIT_EXT)); #endif } - if (dirty & VK_SHADER_STAGE_ALL_GRAPHICS) { - cmd_buffer_emit_descriptor_pointers(cmd_buffer, - dirty & VK_SHADER_STAGE_ALL_GRAPHICS); - } + if (descriptors_pointers_dirty) + cmd_buffer_emit_descriptor_pointers(cmd_buffer, descriptors_pointers_dirty); #if GFX_VER >= 20 if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_INDIRECT_DATA_STRIDE) { @@ -1002,6 +1006,8 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer) } #endif + cmd_buffer->state.descriptors_dirty &= ~descriptors_dirty; + cmd_buffer->state.descriptors_pointers_dirty &= ~descriptors_pointers_dirty; cmd_buffer->state.gfx.dirty = 0; } diff --git a/src/intel/vulkan/genX_simple_shader.c b/src/intel/vulkan/genX_simple_shader.c index a6522aa3ba3..804c304c0db 100644 --- a/src/intel/vulkan/genX_simple_shader.c +++ b/src/intel/vulkan/genX_simple_shader.c @@ -383,7 +383,7 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state) ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE | ANV_CMD_DIRTY_INDEX_TYPE); state->cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; - state->cmd_buffer->state.gfx.push_constant_stages = VK_SHADER_STAGE_FRAGMENT_BIT; + state->cmd_buffer->state.gfx.push_constant_stages |= VK_SHADER_STAGE_FRAGMENT_BIT; } static void