From 0ef0b358da24a5a174e960d12765dd5625d8d685 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 30 Mar 2026 15:13:23 +0300 Subject: [PATCH] anv: expose non binding-table/push-pointer flushing It'll be useful for DGC preprocessing where we need to extract some state programming from the command buffer, yet pointer programming is not required to be flushed. Signed-off-by: Lionel Landwerlin Acked-by: Alyssa Rosenzweig Part-of: --- src/intel/vulkan/anv_genX.h | 4 +- src/intel/vulkan/genX_cmd_draw.c | 128 +++++++++++------- .../vulkan/genX_cmd_draw_generated_indirect.h | 4 +- 3 files changed, 83 insertions(+), 53 deletions(-) diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 72e4000a9c5..874f97934a9 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -228,10 +228,10 @@ void genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer); void genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer); -void genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer); - void genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer); +void genX(cmd_buffer_flush_gfx)(struct anv_cmd_buffer *cmd_buffer); + void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer); void genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, diff --git a/src/intel/vulkan/genX_cmd_draw.c b/src/intel/vulkan/genX_cmd_draw.c index 7d6377df206..6e48ec822de 100644 --- a/src/intel/vulkan/genX_cmd_draw.c +++ b/src/intel/vulkan/genX_cmd_draw.c @@ -791,7 +791,8 @@ cmd_buffer_flush_vertex_buffers(struct anv_cmd_buffer *cmd_buffer, } } -ALWAYS_INLINE static void +/** Flush 3D state programming except binding tables & push constants */ +static inline void cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer) { struct anv_device *device = cmd_buffer->device; @@ -860,29 +861,7 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer) const bool any_dynamic_state_dirty = vk_dynamic_graphics_state_any_dirty(dyn); - cmd_buffer->state.descriptors_dirty |= - genX(cmd_buffer_flush_push_descriptors)(cmd_buffer, - &cmd_buffer->state.gfx.base); - - uint32_t descriptors_dirty = cmd_buffer->state.descriptors_dirty & - gfx->active_stages; - cmd_buffer->state.descriptors_pointers_dirty |= - descriptors_dirty & VK_SHADER_STAGE_ALL_GRAPHICS; - uint32_t descriptors_pointers_dirty = - cmd_buffer->state.descriptors_pointers_dirty & gfx->active_stages; - - /* Because we're pushing UBOs, we have to push whenever either descriptors - * or push constants is dirty. - */ - uint32_t push_constants_dirty = - (cmd_buffer->state.push_constants_dirty | - cmd_buffer->state.descriptors_dirty) & gfx->active_stages; - - if (!cmd_buffer->state.gfx.dirty && - !descriptors_dirty && - !descriptors_pointers_dirty && - !any_dynamic_state_dirty && - !push_constants_dirty) + if (!cmd_buffer->state.gfx.dirty && !any_dynamic_state_dirty) return; if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_XFB_ENABLE) { @@ -981,9 +960,51 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer) } #endif +#if GFX_VER >= 20 + if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_INDIRECT_DATA_STRIDE) { + anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BYTE_STRIDE), sb_stride) { + sb_stride.ByteStride = cmd_buffer->state.gfx.indirect_data_stride >> 2; + sb_stride.ByteStrideEnable = + cmd_buffer->state.gfx.indirect_data_stride_aligned == U_TRISTATE_NO; + } + } +#endif + /* Render targets live in the same binding table as fragment descriptors */ if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_RENDER_TARGETS) - descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; + + cmd_buffer->state.gfx.dirty = 0; +} + +/** Flush binding tables & push constants programming */ +static inline void +cmd_buffer_flush_gfx_pointers(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx; + + cmd_buffer->state.descriptors_dirty |= + genX(cmd_buffer_flush_push_descriptors)(cmd_buffer, + &cmd_buffer->state.gfx.base); + + uint32_t descriptors_dirty = + cmd_buffer->state.descriptors_dirty & gfx->active_stages; + cmd_buffer->state.descriptors_pointers_dirty |= + descriptors_dirty & VK_SHADER_STAGE_ALL_GRAPHICS; + uint32_t descriptors_pointers_dirty = + cmd_buffer->state.descriptors_pointers_dirty & gfx->active_stages; + + /* Because we're pushing UBOs, we have to push whenever either descriptors + * or push constants is dirty. + */ + uint32_t push_constants_dirty = + (cmd_buffer->state.push_constants_dirty | + cmd_buffer->state.descriptors_dirty) & gfx->active_stages; + + if (descriptors_dirty == 0 && + descriptors_pointers_dirty == 0 && + push_constants_dirty == 0) + return; /* We emit the binding tables and sampler tables first, then emit push * constants and then finally emit binding table and sampler table @@ -993,7 +1014,7 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer) * 3DSTATE_BINDING_TABLE_POINTER_* for the push constants to take effect. */ if (descriptors_dirty) { - descriptors_pointers_dirty |= + cmd_buffer->state.descriptors_pointers_dirty |= genX(cmd_buffer_flush_descriptor_sets)( cmd_buffer, &cmd_buffer->state.gfx.base, @@ -1002,6 +1023,10 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer) ARRAY_SIZE(gfx->shaders)) & VK_SHADER_STAGE_ALL_GRAPHICS; } + descriptors_pointers_dirty = + cmd_buffer->state.descriptors_pointers_dirty & gfx->active_stages; + + push_constants_dirty = (cmd_buffer->state.push_constants_dirty | cmd_buffer->state.descriptors_dirty) & gfx->active_stages; if (push_constants_dirty) { @@ -1024,19 +1049,9 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer) if (descriptors_pointers_dirty) cmd_buffer_emit_descriptor_pointers(cmd_buffer, descriptors_pointers_dirty); -#if GFX_VER >= 20 - if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_INDIRECT_DATA_STRIDE) { - anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BYTE_STRIDE), sb_stride) { - sb_stride.ByteStride = cmd_buffer->state.gfx.indirect_data_stride >> 2; - sb_stride.ByteStrideEnable = - cmd_buffer->state.gfx.indirect_data_stride_aligned == U_TRISTATE_NO; - } - } -#endif cmd_buffer->state.descriptors_dirty &= ~descriptors_dirty; cmd_buffer->state.descriptors_pointers_dirty &= ~descriptors_pointers_dirty; - cmd_buffer->state.gfx.dirty = 0; } void @@ -1045,6 +1060,21 @@ genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer) cmd_buffer_flush_gfx_state(cmd_buffer); } +/** Flush everything needed prior to drawcall emission */ +static inline void +cmd_buffer_flush_gfx(struct anv_cmd_buffer *cmd_buffer) +{ + cmd_buffer_flush_gfx_state(cmd_buffer); + cmd_buffer_flush_gfx_pointers(cmd_buffer); +} + +void +genX(cmd_buffer_flush_gfx)(struct anv_cmd_buffer *cmd_buffer) +{ + cmd_buffer_flush_gfx_state(cmd_buffer); + cmd_buffer_flush_gfx_pointers(cmd_buffer); +} + ALWAYS_INLINE static bool anv_use_generated_draws(const struct anv_cmd_buffer *cmd_buffer, uint32_t count) { @@ -1226,7 +1256,7 @@ void genX(CmdDraw)( /* Select pipeline here to allow * cmd_buffer_emit_vertex_constants_and_flush() without flushing before - * cmd_buffer_flush_gfx_state(). + * cmd_buffer_flush_gfx(). */ genX(flush_pipeline_select_3d)(cmd_buffer); @@ -1237,7 +1267,7 @@ void genX(CmdDraw)( false /* force_flush */); #endif - cmd_buffer_flush_gfx_state(cmd_buffer); + cmd_buffer_flush_gfx(cmd_buffer); if (cmd_buffer->state.conditional_render_enabled) genX(cmd_emit_conditional_render_predicate)(cmd_buffer); @@ -1285,7 +1315,7 @@ void genX(CmdDrawMultiEXT)( if (anv_batch_has_error(&cmd_buffer->batch)) return; - cmd_buffer_flush_gfx_state(cmd_buffer); + cmd_buffer_flush_gfx(cmd_buffer); if (cmd_buffer->state.conditional_render_enabled) genX(cmd_emit_conditional_render_predicate)(cmd_buffer); @@ -1387,7 +1417,7 @@ void genX(CmdDrawIndexed)( /* Select pipeline here to allow * cmd_buffer_emit_vertex_constants_and_flush() without flushing before - * cmd_buffer_flush_gfx_state(). + * cmd_buffer_flush_gfx(). */ genX(flush_pipeline_select_3d)(cmd_buffer); @@ -1398,7 +1428,7 @@ void genX(CmdDrawIndexed)( 0, false /* force_flush */); #endif - cmd_buffer_flush_gfx_state(cmd_buffer); + cmd_buffer_flush_gfx(cmd_buffer); if (cmd_buffer->state.conditional_render_enabled) genX(cmd_emit_conditional_render_predicate)(cmd_buffer); @@ -1446,7 +1476,7 @@ void genX(CmdDrawMultiIndexedEXT)( if (anv_batch_has_error(&cmd_buffer->batch)) return; - cmd_buffer_flush_gfx_state(cmd_buffer); + cmd_buffer_flush_gfx(cmd_buffer); if (cmd_buffer->state.conditional_render_enabled) genX(cmd_emit_conditional_render_predicate)(cmd_buffer); @@ -1683,7 +1713,7 @@ void genX(CmdDrawIndirectByteCountEXT)( emit_draw_index(cmd_buffer, 0); #endif - cmd_buffer_flush_gfx_state(cmd_buffer); + cmd_buffer_flush_gfx(cmd_buffer); if (cmd_buffer->state.conditional_render_enabled) genX(cmd_emit_conditional_render_predicate)(cmd_buffer); @@ -1836,7 +1866,7 @@ emit_indirect_draws(struct anv_cmd_buffer *cmd_buffer, const struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx; const struct brw_vs_prog_data *vs_prog_data = get_gfx_vs_prog_data(gfx); #endif - cmd_buffer_flush_gfx_state(cmd_buffer); + cmd_buffer_flush_gfx(cmd_buffer); if (cmd_buffer->state.conditional_render_enabled) genX(cmd_emit_conditional_render_predicate)(cmd_buffer); @@ -1976,7 +2006,7 @@ genX(cmd_buffer_emit_execute_indirect_draws)(struct anv_cmd_buffer *cmd_buffer, bool aligned_stride = cmd_buffer_set_indirect_stride(cmd_buffer, indirect_data_stride, cmd); - genX(cmd_buffer_flush_gfx_state)(cmd_buffer); + cmd_buffer_flush_gfx(cmd_buffer); if (cmd_buffer->state.conditional_render_enabled) genX(cmd_emit_conditional_render_predicate)(cmd_buffer); @@ -2215,7 +2245,7 @@ emit_indirect_count_draws(struct anv_cmd_buffer *cmd_buffer, const struct brw_vs_prog_data *vs_prog_data = get_gfx_vs_prog_data(gfx); #endif - cmd_buffer_flush_gfx_state(cmd_buffer); + cmd_buffer_flush_gfx(cmd_buffer); struct mi_builder b; mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch); @@ -2518,7 +2548,7 @@ genX(CmdDrawMeshTasksEXT)( trace_intel_begin_draw_mesh(&cmd_buffer->trace); /* TODO(mesh): Check if this is not emitting more packets than we need. */ - cmd_buffer_flush_gfx_state(cmd_buffer); + cmd_buffer_flush_gfx(cmd_buffer); if (cmd_buffer->state.conditional_render_enabled) genX(cmd_emit_conditional_render_predicate)(cmd_buffer); @@ -2611,7 +2641,7 @@ genX(CmdDrawMeshTasksIndirectEXT)( return; } - cmd_buffer_flush_gfx_state(cmd_buffer); + cmd_buffer_flush_gfx(cmd_buffer); if (cmd_state->conditional_render_enabled) genX(cmd_emit_conditional_render_predicate)(cmd_buffer); @@ -2678,7 +2708,7 @@ genX(CmdDrawMeshTasksIndirectCountEXT)( return; } - cmd_buffer_flush_gfx_state(cmd_buffer); + cmd_buffer_flush_gfx(cmd_buffer); bool uses_drawid = (task_prog_data && task_prog_data->uses_drawid) || mesh_prog_data->uses_drawid; diff --git a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h index cff60d15f88..20e8214d532 100644 --- a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h +++ b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h @@ -324,7 +324,7 @@ genX(cmd_buffer_emit_indirect_generated_draws_inplace)(struct anv_cmd_buffer *cm genX(cmd_buffer_emit_indirect_generated_draws_init)(cmd_buffer); /* Emit the 3D state in the main batch. */ - genX(cmd_buffer_flush_gfx_state)(cmd_buffer); + genX(cmd_buffer_flush_gfx)(cmd_buffer); if (cmd_buffer->state.conditional_render_enabled) genX(cmd_emit_conditional_render_predicate)(cmd_buffer); @@ -562,7 +562,7 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd trace_intel_end_generate_draws(&cmd_buffer->trace); /* Emit the 3D state in the main batch. */ - genX(cmd_buffer_flush_gfx_state)(cmd_buffer); + genX(cmd_buffer_flush_gfx)(cmd_buffer); if (cmd_buffer->state.conditional_render_enabled) genX(cmd_emit_conditional_render_predicate)(cmd_buffer);