anv: optimize binding table flushing

Split emission from pointers programming.

That way we can switch back & forth between blorp & applications
shaders and never emit binding tables, we just reprogram the pointers.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39405>
This commit is contained in:
Lionel Landwerlin 2026-01-15 12:17:55 +02:00 committed by Marge Bot
parent 3a9d075e0c
commit 4fa1eddb4c
6 changed files with 67 additions and 46 deletions

View file

@ -436,7 +436,8 @@ set_dirty_for_bind_map(struct anv_cmd_buffer *cmd_buffer,
const struct anv_pipeline_bind_map *map) const struct anv_pipeline_bind_map *map)
{ {
assert(stage < ARRAY_SIZE(cmd_buffer->state.surface_sha1s)); assert(stage < ARRAY_SIZE(cmd_buffer->state.surface_sha1s));
if (mem_update(cmd_buffer->state.surface_sha1s[stage], if (map->surface_count > 0 &&
mem_update(cmd_buffer->state.surface_sha1s[stage],
map->surface_sha1, sizeof(map->surface_sha1))) { map->surface_sha1, sizeof(map->surface_sha1))) {
anv_cmd_buffer_dirty_descriptors(cmd_buffer, anv_cmd_buffer_dirty_descriptors(cmd_buffer,
mesa_to_vk_shader_stage(stage), mesa_to_vk_shader_stage(stage),
@ -444,7 +445,8 @@ set_dirty_for_bind_map(struct anv_cmd_buffer *cmd_buffer,
} }
assert(stage < ARRAY_SIZE(cmd_buffer->state.sampler_sha1s)); assert(stage < ARRAY_SIZE(cmd_buffer->state.sampler_sha1s));
if (mem_update(cmd_buffer->state.sampler_sha1s[stage], if (map->sampler_count > 0 &&
mem_update(cmd_buffer->state.sampler_sha1s[stage],
map->sampler_sha1, sizeof(map->sampler_sha1))) { map->sampler_sha1, sizeof(map->sampler_sha1))) {
anv_cmd_buffer_dirty_descriptors(cmd_buffer, anv_cmd_buffer_dirty_descriptors(cmd_buffer,
mesa_to_vk_shader_stage(stage), mesa_to_vk_shader_stage(stage),

View file

@ -4739,7 +4739,17 @@ struct anv_cmd_state {
enum anv_query_bits clear_bits; enum anv_query_bits clear_bits;
} queries; } queries;
/** Tracks whether 3DSTATE_BINDING_TABLE_POINTERS_* instructions need
* emissions
*/
VkShaderStageFlags descriptors_pointers_dirty;
/** Tracks whether binding tables needs to be emitted (leads to
* 3DSTATE_BINDING_TABLE_POINTERS_* emission once flushed)
*/
VkShaderStageFlags descriptors_dirty; VkShaderStageFlags descriptors_dirty;
/** Tracks push descriptor set emission (leads to
* 3DSTATE_BINDING_TABLE_POINTERS_* emission once flushed)
*/
VkShaderStageFlags push_descriptors_dirty; VkShaderStageFlags push_descriptors_dirty;
/** Tracks the 3DSTATE_CONSTANT_* instruction that needs to be reemitted */ /** Tracks the 3DSTATE_CONSTANT_* instruction that needs to be reemitted */
VkShaderStageFlags push_constants_dirty; VkShaderStageFlags push_constants_dirty;

View file

@ -438,7 +438,7 @@ blorp_exec_on_render(struct blorp_batch *batch,
cmd_buffer->state.gfx.vb_dirty = ~0; cmd_buffer->state.gfx.vb_dirty = ~0;
cmd_buffer->state.gfx.dirty |= dirty; cmd_buffer->state.gfx.dirty |= dirty;
if (blorp_uses_bti_rt_writes(batch, params)) if (blorp_uses_bti_rt_writes(batch, params))
cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS; cmd_buffer->state.descriptors_pointers_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS;
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS; cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS;
} }
@ -461,6 +461,7 @@ blorp_exec_on_compute(struct blorp_batch *batch,
anv_cmd_buffer_dirty_descriptors(cmd_buffer, anv_cmd_buffer_dirty_descriptors(cmd_buffer,
VK_SHADER_STAGE_COMPUTE_BIT, VK_SHADER_STAGE_COMPUTE_BIT,
"blorp compute"); "blorp compute");
cmd_buffer->state.descriptors_pointers_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
cmd_buffer->state.compute.pipeline_dirty = true; cmd_buffer->state.compute.pipeline_dirty = true;

View file

@ -193,16 +193,18 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
&cmd_buffer->state.compute.base), &cmd_buffer->state.compute.base),
"dirty compute descriptor"); "dirty compute descriptor");
if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || if (cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) {
cmd_buffer->state.compute.pipeline_dirty) { cmd_buffer->state.descriptors_pointers_dirty |=
genX(cmd_buffer_flush_descriptor_sets)( genX(cmd_buffer_flush_descriptor_sets)(
cmd_buffer, cmd_buffer,
&cmd_buffer->state.compute.base, &cmd_buffer->state.compute.base,
VK_SHADER_STAGE_COMPUTE_BIT, VK_SHADER_STAGE_COMPUTE_BIT,
(const struct anv_shader **)&comp_state->shader, 1); (const struct anv_shader **)&comp_state->shader, 1);
cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT; cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT;
}
#if GFX_VERx10 < 125 #if GFX_VERx10 < 125
if ((cmd_buffer->state.descriptors_pointers_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
cmd_buffer->state.compute.pipeline_dirty) {
uint32_t iface_desc_data_dw[GENX(INTERFACE_DESCRIPTOR_DATA_length)]; uint32_t iface_desc_data_dw[GENX(INTERFACE_DESCRIPTOR_DATA_length)];
struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = { struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
.BindingTablePointer = .BindingTablePointer =
@ -224,8 +226,8 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
mid.InterfaceDescriptorTotalLength = size; mid.InterfaceDescriptorTotalLength = size;
mid.InterfaceDescriptorDataStartAddress = state.offset; mid.InterfaceDescriptorDataStartAddress = state.offset;
} }
#endif
} }
#endif
if (cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_COMPUTE_BIT) { if (cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_COMPUTE_BIT) {

View file

@ -632,8 +632,6 @@ cmd_buffer_flush_mesh_inline_data(struct anv_cmd_buffer *cmd_buffer,
data.InlineData[ANV_INLINE_PARAM_MESH_PROVOKING_VERTEX / 4] = gfx->dyn_state.mesh_provoking_vertex; data.InlineData[ANV_INLINE_PARAM_MESH_PROVOKING_VERTEX / 4] = gfx->dyn_state.mesh_provoking_vertex;
} }
} }
cmd_buffer->state.push_constants_dirty &= ~dirty_stages;
} }
#endif #endif
@ -833,19 +831,30 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer)
const bool any_dynamic_state_dirty = const bool any_dynamic_state_dirty =
vk_dynamic_graphics_state_any_dirty(dyn); vk_dynamic_graphics_state_any_dirty(dyn);
uint32_t descriptors_dirty = cmd_buffer->state.descriptors_dirty &
gfx->active_stages;
descriptors_dirty |= cmd_buffer->state.descriptors_dirty |=
genX(cmd_buffer_flush_push_descriptors)(cmd_buffer, genX(cmd_buffer_flush_push_descriptors)(cmd_buffer,
&cmd_buffer->state.gfx.base); &cmd_buffer->state.gfx.base);
if (!cmd_buffer->state.gfx.dirty && !descriptors_dirty && uint32_t descriptors_dirty = cmd_buffer->state.descriptors_dirty &
gfx->active_stages;
cmd_buffer->state.descriptors_pointers_dirty |=
descriptors_dirty & VK_SHADER_STAGE_ALL_GRAPHICS;
uint32_t descriptors_pointers_dirty =
cmd_buffer->state.descriptors_pointers_dirty & gfx->active_stages;
/* Because we're pushing UBOs, we have to push whenever either descriptors
* or push constants is dirty.
*/
uint32_t push_constants_dirty =
(cmd_buffer->state.push_constants_dirty |
cmd_buffer->state.descriptors_dirty) & gfx->active_stages;
if (!cmd_buffer->state.gfx.dirty &&
!descriptors_dirty &&
!descriptors_pointers_dirty &&
!any_dynamic_state_dirty && !any_dynamic_state_dirty &&
((cmd_buffer->state.push_constants_dirty & !push_constants_dirty)
(VK_SHADER_STAGE_ALL_GRAPHICS |
VK_SHADER_STAGE_TASK_BIT_EXT |
VK_SHADER_STAGE_MESH_BIT_EXT)) == 0))
return; return;
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_XFB_ENABLE) { if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_XFB_ENABLE) {
@ -955,42 +964,37 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer)
* emitting push constants, on SKL+ we have to emit the corresponding * emitting push constants, on SKL+ we have to emit the corresponding
* 3DSTATE_BINDING_TABLE_POINTER_* for the push constants to take effect. * 3DSTATE_BINDING_TABLE_POINTER_* for the push constants to take effect.
*/ */
uint32_t dirty = 0;
if (descriptors_dirty) { if (descriptors_dirty) {
dirty = genX(cmd_buffer_flush_descriptor_sets)( descriptors_pointers_dirty |=
cmd_buffer, genX(cmd_buffer_flush_descriptor_sets)(
&cmd_buffer->state.gfx.base, cmd_buffer,
descriptors_dirty, &cmd_buffer->state.gfx.base,
(const struct anv_shader **)gfx->shaders, descriptors_dirty,
ARRAY_SIZE(gfx->shaders)); (const struct anv_shader **)gfx->shaders,
cmd_buffer->state.descriptors_dirty &= ~dirty; ARRAY_SIZE(gfx->shaders)) & VK_SHADER_STAGE_ALL_GRAPHICS;
} }
if (dirty || cmd_buffer->state.push_constants_dirty) { push_constants_dirty = (cmd_buffer->state.push_constants_dirty |
/* Because we're pushing UBOs, we have to push whenever either cmd_buffer->state.descriptors_dirty) & gfx->active_stages;
* descriptors or push constants is dirty. if (push_constants_dirty) {
*/
VkShaderStageFlags push_stages = dirty |
(cmd_buffer->state.push_constants_dirty & gfx->active_stages);
#if INTEL_NEEDS_WA_1604061319 #if INTEL_NEEDS_WA_1604061319
/* Testing shows that all the 3DSTATE_CONSTANT_XS need to be emitted if /* Testing shows that all the 3DSTATE_CONSTANT_XS need to be emitted if
* any stage has 3DSTATE_CONSTANT_XS emitted. * any stage has 3DSTATE_CONSTANT_XS emitted.
*/ */
push_stages |= gfx->active_stages; push_constants_dirty |= gfx->active_stages;
#endif #endif
cmd_buffer_flush_gfx_push_constants(cmd_buffer, cmd_buffer_flush_gfx_push_constants(
push_stages & VK_SHADER_STAGE_ALL_GRAPHICS); cmd_buffer,
push_constants_dirty & VK_SHADER_STAGE_ALL_GRAPHICS);
#if GFX_VERx10 >= 125 #if GFX_VERx10 >= 125
cmd_buffer_flush_mesh_inline_data( cmd_buffer_flush_mesh_inline_data(
cmd_buffer, push_stages & (VK_SHADER_STAGE_TASK_BIT_EXT | cmd_buffer, push_constants_dirty & (VK_SHADER_STAGE_TASK_BIT_EXT |
VK_SHADER_STAGE_MESH_BIT_EXT)); VK_SHADER_STAGE_MESH_BIT_EXT));
#endif #endif
} }
if (dirty & VK_SHADER_STAGE_ALL_GRAPHICS) { if (descriptors_pointers_dirty)
cmd_buffer_emit_descriptor_pointers(cmd_buffer, cmd_buffer_emit_descriptor_pointers(cmd_buffer, descriptors_pointers_dirty);
dirty & VK_SHADER_STAGE_ALL_GRAPHICS);
}
#if GFX_VER >= 20 #if GFX_VER >= 20
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_INDIRECT_DATA_STRIDE) { if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_INDIRECT_DATA_STRIDE) {
@ -1002,6 +1006,8 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer)
} }
#endif #endif
cmd_buffer->state.descriptors_dirty &= ~descriptors_dirty;
cmd_buffer->state.descriptors_pointers_dirty &= ~descriptors_pointers_dirty;
cmd_buffer->state.gfx.dirty = 0; cmd_buffer->state.gfx.dirty = 0;
} }

View file

@ -383,7 +383,7 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state)
ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE | ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE |
ANV_CMD_DIRTY_INDEX_TYPE); ANV_CMD_DIRTY_INDEX_TYPE);
state->cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; state->cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
state->cmd_buffer->state.gfx.push_constant_stages = VK_SHADER_STAGE_FRAGMENT_BIT; state->cmd_buffer->state.gfx.push_constant_stages |= VK_SHADER_STAGE_FRAGMENT_BIT;
} }
static void static void