anv: optimize binding table flushing

Split emission from pointers programming.

That way we can switch back & forth between blorp & applications
shaders and never emit binding tables, we just reprogram the pointers.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39405>
This commit is contained in:
Lionel Landwerlin 2026-01-15 12:17:55 +02:00 committed by Marge Bot
parent 3a9d075e0c
commit 4fa1eddb4c
6 changed files with 67 additions and 46 deletions

View file

@ -436,7 +436,8 @@ set_dirty_for_bind_map(struct anv_cmd_buffer *cmd_buffer,
const struct anv_pipeline_bind_map *map)
{
assert(stage < ARRAY_SIZE(cmd_buffer->state.surface_sha1s));
if (mem_update(cmd_buffer->state.surface_sha1s[stage],
if (map->surface_count > 0 &&
mem_update(cmd_buffer->state.surface_sha1s[stage],
map->surface_sha1, sizeof(map->surface_sha1))) {
anv_cmd_buffer_dirty_descriptors(cmd_buffer,
mesa_to_vk_shader_stage(stage),
@ -444,7 +445,8 @@ set_dirty_for_bind_map(struct anv_cmd_buffer *cmd_buffer,
}
assert(stage < ARRAY_SIZE(cmd_buffer->state.sampler_sha1s));
if (mem_update(cmd_buffer->state.sampler_sha1s[stage],
if (map->sampler_count > 0 &&
mem_update(cmd_buffer->state.sampler_sha1s[stage],
map->sampler_sha1, sizeof(map->sampler_sha1))) {
anv_cmd_buffer_dirty_descriptors(cmd_buffer,
mesa_to_vk_shader_stage(stage),

View file

@ -4739,7 +4739,17 @@ struct anv_cmd_state {
enum anv_query_bits clear_bits;
} queries;
/** Tracks whether 3DSTATE_BINDING_TABLE_POINTERS_* instructions need
* emissions
*/
VkShaderStageFlags descriptors_pointers_dirty;
/** Tracks whether binding tables needs to be emitted (leads to
* 3DSTATE_BINDING_TABLE_POINTERS_* emission once flushed)
*/
VkShaderStageFlags descriptors_dirty;
/** Tracks push descriptor set emission (leads to
* 3DSTATE_BINDING_TABLE_POINTERS_* emission once flushed)
*/
VkShaderStageFlags push_descriptors_dirty;
/** Tracks the 3DSTATE_CONSTANT_* instruction that needs to be reemitted */
VkShaderStageFlags push_constants_dirty;

View file

@ -438,7 +438,7 @@ blorp_exec_on_render(struct blorp_batch *batch,
cmd_buffer->state.gfx.vb_dirty = ~0;
cmd_buffer->state.gfx.dirty |= dirty;
if (blorp_uses_bti_rt_writes(batch, params))
cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS;
cmd_buffer->state.descriptors_pointers_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS;
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS;
}
@ -461,6 +461,7 @@ blorp_exec_on_compute(struct blorp_batch *batch,
anv_cmd_buffer_dirty_descriptors(cmd_buffer,
VK_SHADER_STAGE_COMPUTE_BIT,
"blorp compute");
cmd_buffer->state.descriptors_pointers_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
cmd_buffer->state.compute.pipeline_dirty = true;

View file

@ -193,16 +193,18 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
&cmd_buffer->state.compute.base),
"dirty compute descriptor");
if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
cmd_buffer->state.compute.pipeline_dirty) {
genX(cmd_buffer_flush_descriptor_sets)(
cmd_buffer,
&cmd_buffer->state.compute.base,
VK_SHADER_STAGE_COMPUTE_BIT,
(const struct anv_shader **)&comp_state->shader, 1);
if (cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) {
cmd_buffer->state.descriptors_pointers_dirty |=
genX(cmd_buffer_flush_descriptor_sets)(
cmd_buffer,
&cmd_buffer->state.compute.base,
VK_SHADER_STAGE_COMPUTE_BIT,
(const struct anv_shader **)&comp_state->shader, 1);
cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT;
}
#if GFX_VERx10 < 125
if ((cmd_buffer->state.descriptors_pointers_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
cmd_buffer->state.compute.pipeline_dirty) {
uint32_t iface_desc_data_dw[GENX(INTERFACE_DESCRIPTOR_DATA_length)];
struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
.BindingTablePointer =
@ -224,8 +226,8 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
mid.InterfaceDescriptorTotalLength = size;
mid.InterfaceDescriptorDataStartAddress = state.offset;
}
#endif
}
#endif
if (cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_COMPUTE_BIT) {

View file

@ -632,8 +632,6 @@ cmd_buffer_flush_mesh_inline_data(struct anv_cmd_buffer *cmd_buffer,
data.InlineData[ANV_INLINE_PARAM_MESH_PROVOKING_VERTEX / 4] = gfx->dyn_state.mesh_provoking_vertex;
}
}
cmd_buffer->state.push_constants_dirty &= ~dirty_stages;
}
#endif
@ -833,19 +831,30 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer)
const bool any_dynamic_state_dirty =
vk_dynamic_graphics_state_any_dirty(dyn);
uint32_t descriptors_dirty = cmd_buffer->state.descriptors_dirty &
gfx->active_stages;
descriptors_dirty |=
cmd_buffer->state.descriptors_dirty |=
genX(cmd_buffer_flush_push_descriptors)(cmd_buffer,
&cmd_buffer->state.gfx.base);
if (!cmd_buffer->state.gfx.dirty && !descriptors_dirty &&
uint32_t descriptors_dirty = cmd_buffer->state.descriptors_dirty &
gfx->active_stages;
cmd_buffer->state.descriptors_pointers_dirty |=
descriptors_dirty & VK_SHADER_STAGE_ALL_GRAPHICS;
uint32_t descriptors_pointers_dirty =
cmd_buffer->state.descriptors_pointers_dirty & gfx->active_stages;
/* Because we're pushing UBOs, we have to push whenever either descriptors
* or push constants is dirty.
*/
uint32_t push_constants_dirty =
(cmd_buffer->state.push_constants_dirty |
cmd_buffer->state.descriptors_dirty) & gfx->active_stages;
if (!cmd_buffer->state.gfx.dirty &&
!descriptors_dirty &&
!descriptors_pointers_dirty &&
!any_dynamic_state_dirty &&
((cmd_buffer->state.push_constants_dirty &
(VK_SHADER_STAGE_ALL_GRAPHICS |
VK_SHADER_STAGE_TASK_BIT_EXT |
VK_SHADER_STAGE_MESH_BIT_EXT)) == 0))
!push_constants_dirty)
return;
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_XFB_ENABLE) {
@ -955,42 +964,37 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer)
* emitting push constants, on SKL+ we have to emit the corresponding
* 3DSTATE_BINDING_TABLE_POINTER_* for the push constants to take effect.
*/
uint32_t dirty = 0;
if (descriptors_dirty) {
dirty = genX(cmd_buffer_flush_descriptor_sets)(
cmd_buffer,
&cmd_buffer->state.gfx.base,
descriptors_dirty,
(const struct anv_shader **)gfx->shaders,
ARRAY_SIZE(gfx->shaders));
cmd_buffer->state.descriptors_dirty &= ~dirty;
descriptors_pointers_dirty |=
genX(cmd_buffer_flush_descriptor_sets)(
cmd_buffer,
&cmd_buffer->state.gfx.base,
descriptors_dirty,
(const struct anv_shader **)gfx->shaders,
ARRAY_SIZE(gfx->shaders)) & VK_SHADER_STAGE_ALL_GRAPHICS;
}
if (dirty || cmd_buffer->state.push_constants_dirty) {
/* Because we're pushing UBOs, we have to push whenever either
* descriptors or push constants is dirty.
*/
VkShaderStageFlags push_stages = dirty |
(cmd_buffer->state.push_constants_dirty & gfx->active_stages);
push_constants_dirty = (cmd_buffer->state.push_constants_dirty |
cmd_buffer->state.descriptors_dirty) & gfx->active_stages;
if (push_constants_dirty) {
#if INTEL_NEEDS_WA_1604061319
/* Testing shows that all the 3DSTATE_CONSTANT_XS need to be emitted if
* any stage has 3DSTATE_CONSTANT_XS emitted.
*/
push_stages |= gfx->active_stages;
push_constants_dirty |= gfx->active_stages;
#endif
cmd_buffer_flush_gfx_push_constants(cmd_buffer,
push_stages & VK_SHADER_STAGE_ALL_GRAPHICS);
cmd_buffer_flush_gfx_push_constants(
cmd_buffer,
push_constants_dirty & VK_SHADER_STAGE_ALL_GRAPHICS);
#if GFX_VERx10 >= 125
cmd_buffer_flush_mesh_inline_data(
cmd_buffer, push_stages & (VK_SHADER_STAGE_TASK_BIT_EXT |
VK_SHADER_STAGE_MESH_BIT_EXT));
cmd_buffer, push_constants_dirty & (VK_SHADER_STAGE_TASK_BIT_EXT |
VK_SHADER_STAGE_MESH_BIT_EXT));
#endif
}
if (dirty & VK_SHADER_STAGE_ALL_GRAPHICS) {
cmd_buffer_emit_descriptor_pointers(cmd_buffer,
dirty & VK_SHADER_STAGE_ALL_GRAPHICS);
}
if (descriptors_pointers_dirty)
cmd_buffer_emit_descriptor_pointers(cmd_buffer, descriptors_pointers_dirty);
#if GFX_VER >= 20
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_INDIRECT_DATA_STRIDE) {
@ -1002,6 +1006,8 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer)
}
#endif
cmd_buffer->state.descriptors_dirty &= ~descriptors_dirty;
cmd_buffer->state.descriptors_pointers_dirty &= ~descriptors_pointers_dirty;
cmd_buffer->state.gfx.dirty = 0;
}

View file

@ -383,7 +383,7 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state)
ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE |
ANV_CMD_DIRTY_INDEX_TYPE);
state->cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
state->cmd_buffer->state.gfx.push_constant_stages = VK_SHADER_STAGE_FRAGMENT_BIT;
state->cmd_buffer->state.gfx.push_constant_stages |= VK_SHADER_STAGE_FRAGMENT_BIT;
}
static void