From d6751f2a3b72fe625a0fcfd4c0efccb9ecc50278 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 18 May 2026 13:40:57 +0300 Subject: [PATCH] anv: further optimize dirty state after secondary emission Signed-off-by: Lionel Landwerlin Reviewed-by: Ivan Briano Part-of: --- src/intel/vulkan/anv_private.h | 4 ++++ src/intel/vulkan/genX_blorp_exec.c | 3 +++ src/intel/vulkan/genX_cmd_buffer.c | 18 ++++++++++++++---- src/intel/vulkan/genX_gfx_state.c | 3 +++ src/intel/vulkan/genX_gpu_memcpy.c | 3 +++ src/intel/vulkan/genX_simple_shader.c | 3 +++ 6 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 4d22d9efb16..c39695398a3 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1984,6 +1984,7 @@ enum anv_gfx_state_bits { ANV_GFX_STATE_WM, ANV_GFX_STATE_WM_DEPTH_STENCIL, ANV_GFX_STATE_PS_EXTRA, + ANV_GFX_STATE_PMA_FIX, /* Fake state to implement workaround */ ANV_GFX_STATE_WA_18019816803, /* Fake state to implement workaround */ ANV_GFX_STATE_WA_14018283232, /* Fake state to implement workaround */ @@ -2492,6 +2493,9 @@ struct anv_gfx_dynamic_state { /** Dirty bits of what needs to be reemitted */ BITSET_DECLARE(emit_dirty, ANV_GFX_STATE_MAX); + + /** Emitted bits */ + BITSET_DECLARE(emitted, ANV_GFX_STATE_MAX); }; enum anv_internal_kernel_name { diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c index b0f59e6edad..79a99d46545 100644 --- a/src/intel/vulkan/genX_blorp_exec.c +++ b/src/intel/vulkan/genX_blorp_exec.c @@ -432,6 +432,9 @@ blorp_exec_on_render(struct blorp_batch *batch, BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_PS_BLEND); } + /* Add the flagged instructions as emitted */ + BITSET_OR(hw_state->emitted, hw_state->emitted, hw_state->emit_dirty); + anv_cmd_dirty_mask_t dirty = ~(ANV_CMD_DIRTY_INDEX_BUFFER | ANV_CMD_DIRTY_XFB_ENABLE | ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE | diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 8b959ea2a87..43b67c2e97d 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -4415,6 +4415,20 @@ genX(CmdExecuteCommands)( container->state.compute.trace_rays_active |= secondary->state.compute.trace_rays_active; + + /* For each GFX instruction emitted in the secondary, mark it dirty in + * the container, so it's reemited. Even though Vulkan spec says that + * after a secondary command buffer is executed the state in the primary + * is undefined, our emission optimization code will avoid dirtying an + * instruction if the values inside an instruction haven't changed, but + * it doesn't see that this was potentially changed by the secondary. + * + * TODO: do an ultimate version of this by diffing secondary/container + * emitted instructions + */ + BITSET_OR(container->state.gfx.dyn_state.emit_dirty, + container->state.gfx.dyn_state.emit_dirty, + secondary->state.gfx.dyn_state.emitted); } /* The secondary isn't counted in our VF cache tracking so we need to @@ -4446,10 +4460,6 @@ genX(CmdExecuteCommands)( memset(&container->state.gfx.urb_cfg, 0, sizeof(struct intel_urb_config)); - /* Reemit all GFX instructions in container */ - BITSET_OR(container->state.gfx.dyn_state.emit_dirty, - container->state.gfx.dyn_state.emit_dirty, - device->gfx_dirty_state); if (container->device->vk.enabled_extensions.KHR_fragment_shading_rate) { /* Also recompute the CPS_STATE offset */ struct vk_dynamic_graphics_state *dyn = diff --git a/src/intel/vulkan/genX_gfx_state.c b/src/intel/vulkan/genX_gfx_state.c index bad0f962e7c..ba6b3c505ec 100644 --- a/src/intel/vulkan/genX_gfx_state.c +++ b/src/intel/vulkan/genX_gfx_state.c @@ -3646,6 +3646,9 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer) mi_builder_set_mocs(&b, isl_mocs(&device->isl_dev, 0, false)); } + /* Save all the instructions we're about to emit */ + BITSET_OR(hw_state->emitted, hw_state->emitted, hw_state->emit_dirty); + #if INTEL_WA_16011107343_GFX_VER /* Will be emitted in front of every draw instead */ if (intel_needs_workaround(device->info, 16011107343) && diff --git a/src/intel/vulkan/genX_gpu_memcpy.c b/src/intel/vulkan/genX_gpu_memcpy.c index 0eed0a1ad42..bdac04819a0 100644 --- a/src/intel/vulkan/genX_gpu_memcpy.c +++ b/src/intel/vulkan/genX_gpu_memcpy.c @@ -373,6 +373,9 @@ genX(emit_so_memcpy_fini)(struct anv_memcpy_state *state, BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_TASK_CONTROL); } + /* Add the flagged instructions as emitted */ + BITSET_OR(hw_state->emitted, hw_state->emitted, hw_state->emit_dirty); + state->cmd_buffer->state.gfx.dirty |= ~(ANV_CMD_DIRTY_ALL_SHADERS(state->device) | ANV_CMD_DIRTY_INDEX_BUFFER | diff --git a/src/intel/vulkan/genX_simple_shader.c b/src/intel/vulkan/genX_simple_shader.c index 142631bee6a..fcbd46cc8d1 100644 --- a/src/intel/vulkan/genX_simple_shader.c +++ b/src/intel/vulkan/genX_simple_shader.c @@ -358,6 +358,9 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state) BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_TASK_CONTROL); } + /* Add the flagged instructions as emitted */ + BITSET_OR(hw_state->emitted, hw_state->emitted, hw_state->emit_dirty); + /* Update urb config after simple shader. */ memcpy(&state->cmd_buffer->state.gfx.urb_cfg, &urb_cfg, sizeof(urb_cfg));