From ca0a509b6b02551cc68d8c7f9675b6f6b1b26bb3 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 2 Apr 2025 11:31:17 +0300 Subject: [PATCH] anv: move 3DSTATE_VFG emission to dynamic path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A bunch of fields are fixed and other depends on what stages are bound. Signed-off-by: Lionel Landwerlin Reviewed-by: Tapani Pälli Part-of: --- src/intel/vulkan/anv_cmd_buffer.c | 1 - src/intel/vulkan/anv_private.h | 3 +- src/intel/vulkan/genX_gfx_state.c | 61 ++++++++++++++++++++++++++++++- src/intel/vulkan/genX_pipeline.c | 36 ------------------ 4 files changed, 61 insertions(+), 40 deletions(-) diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index d7a222e9235..ff139800c98 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -566,7 +566,6 @@ anv_cmd_buffer_flush_pipeline_hw_state(struct anv_cmd_buffer *cmd_buffer, diff_fix_state(STREAMOUT, partial.so); diff_fix_state(GS, partial.gs); diff_fix_state(TE, partial.te); - diff_fix_state(VFG, partial.vfg); diff_fix_state(PS, partial.ps); diff_fix_state(PS_EXTRA, partial.ps_extra); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 58013369892..b9c07f7f922 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1862,7 +1862,9 @@ struct anv_gfx_dynamic_state { /* 3DSTATE_VFG */ struct { + uint32_t DistributionGranularity; uint32_t DistributionMode; + bool GranularityThresholdDisable; bool ListCutIndexEnable; } vfg; @@ -5214,7 +5216,6 @@ struct anv_graphics_pipeline { struct anv_gfx_state_ptr te; struct anv_gfx_state_ptr ps; struct anv_gfx_state_ptr ps_protected; - struct anv_gfx_state_ptr vfg; } partial; }; diff --git a/src/intel/vulkan/genX_gfx_state.c b/src/intel/vulkan/genX_gfx_state.c index 5aa943245e5..d84d06ce585 100644 --- a/src/intel/vulkan/genX_gfx_state.c +++ b/src/intel/vulkan/genX_gfx_state.c @@ -1133,6 +1133,43 @@ update_ps_extra_kills_pixel(struct anv_gfx_dynamic_state *hw_state, } #if GFX_VERx10 >= 125 +ALWAYS_INLINE static bool +geom_or_tess_prim_id_used(const struct anv_cmd_graphics_state *gfx) +{ + const struct brw_tcs_prog_data *tcs_prog_data = + get_gfx_tcs_prog_data(gfx); + const struct brw_tes_prog_data *tes_prog_data = + get_gfx_tes_prog_data(gfx); + const struct brw_gs_prog_data *gs_prog_data = + get_gfx_gs_prog_data(gfx); + + return (tcs_prog_data && tcs_prog_data->include_primitive_id) || + (tes_prog_data && tes_prog_data->include_primitive_id) || + (gs_prog_data && gs_prog_data->include_primitive_id); +} + +ALWAYS_INLINE static void +update_vfg_distribution_mode(struct anv_gfx_dynamic_state *hw_state, + const struct anv_device *device, + const struct anv_cmd_graphics_state *gfx) +{ + const bool needs_instance_granularity = + intel_needs_workaround(device->info, 14019166699) && + (sbe_primitive_id_override(gfx) || geom_or_tess_prim_id_used(gfx)); + + + SET(VFG, vfg.DistributionMode, (GFX_VER < 20 && + !anv_gfx_has_stage(gfx, MESA_SHADER_TESS_EVAL)) ? + RR_FREE : RR_STRICT); + SET(VFG, vfg.DistributionGranularity, needs_instance_granularity ? + InstanceLevelGranularity : + BatchLevelGranularity); +#if INTEL_WA_14014851047_GFX_VER + SET(VFG, vfg.GranularityThresholdDisable, intel_needs_workaround(device->info, + 14014851047)); +#endif +} + ALWAYS_INLINE static void update_vfg_list_cut_index(struct anv_gfx_dynamic_state *hw_state, const struct vk_dynamic_graphics_state *dyn) @@ -2308,6 +2345,9 @@ cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state, BITSET_SET(hw_state->dirty, ANV_GFX_STATE_INDEX_BUFFER); #if GFX_VERx10 >= 125 + if (gfx->dirty & ANV_CMD_DIRTY_PRERASTER_SHADERS) + update_vfg_distribution_mode(hw_state, device, gfx); + if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) update_vfg_list_cut_index(hw_state, dyn); #endif @@ -3271,8 +3311,25 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer) #if GFX_VERx10 >= 125 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VFG)) { - anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_VFG), - pipeline, partial.vfg, vfg) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VFG), vfg) { + /* 192 vertices for TRILIST_ADJ */ + vfg.ListNBatchSizeScale = 0; + /* Batch size of 384 vertices */ + vfg.List3BatchSizeScale = 2; + /* Batch size of 128 vertices */ + vfg.List2BatchSizeScale = 1; + /* Batch size of 128 vertices */ + vfg.List1BatchSizeScale = 2; + /* Batch size of 256 vertices for STRIP topologies */ + vfg.StripBatchSizeScale = 3; + /* 192 control points for PATCHLIST_3 */ + vfg.PatchBatchSizeScale = 1; + /* 192 control points for PATCHLIST_3 */ + vfg.PatchBatchSizeMultiplier = 31; + + SET(vfg, vfg, DistributionGranularity); + SET(vfg, vfg, DistributionMode); + SET(vfg, vfg, GranularityThresholdDisable); SET(vfg, vfg, ListCutIndexEnable); } } diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index a31d89a1ddf..0cd3822783e 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -1475,42 +1475,6 @@ genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline, { compute_kill_pixel(pipeline, state->ms, state); -#if GFX_VERx10 >= 125 - bool needs_instance_granularity = - intel_needs_workaround(pipeline->base.base.device->info, 14019166699) && - (sbe_primitive_id_override(pipeline) || - geom_or_tess_prim_id_used(pipeline)); - - anv_pipeline_emit(pipeline, partial.vfg, GENX(3DSTATE_VFG), vfg) { - /* Gfx12.5: If 3DSTATE_TE: TE Enable == 1 then RR_STRICT else RR_FREE */ - vfg.DistributionMode = -#if GFX_VER < 20 - !anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL) ? RR_FREE : -#endif - RR_STRICT; - vfg.DistributionGranularity = needs_instance_granularity ? - InstanceLevelGranularity : BatchLevelGranularity; -#if INTEL_WA_14014851047_GFX_VER - vfg.GranularityThresholdDisable = - intel_needs_workaround(pipeline->base.base.device->info, 14014851047); -#endif - /* 192 vertices for TRILIST_ADJ */ - vfg.ListNBatchSizeScale = 0; - /* Batch size of 384 vertices */ - vfg.List3BatchSizeScale = 2; - /* Batch size of 128 vertices */ - vfg.List2BatchSizeScale = 1; - /* Batch size of 128 vertices */ - vfg.List1BatchSizeScale = 2; - /* Batch size of 256 vertices for STRIP topologies */ - vfg.StripBatchSizeScale = 3; - /* 192 control points for PATCHLIST_3 */ - vfg.PatchBatchSizeScale = 1; - /* 192 control points for PATCHLIST_3 */ - vfg.PatchBatchSizeMultiplier = 31; - } -#endif - if (anv_pipeline_is_primitive(pipeline)) { emit_vertex_input(pipeline, state, state->vi);