anv: move 3DSTATE_VFG emission to dynamic path

A bunch of fields are fixed and other depends on what stages are
bound.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36665>
This commit is contained in:
Lionel Landwerlin 2025-04-02 11:31:17 +03:00 committed by Marge Bot
parent 5a8e295a97
commit ca0a509b6b
4 changed files with 61 additions and 40 deletions

View file

@ -566,7 +566,6 @@ anv_cmd_buffer_flush_pipeline_hw_state(struct anv_cmd_buffer *cmd_buffer,
diff_fix_state(STREAMOUT, partial.so);
diff_fix_state(GS, partial.gs);
diff_fix_state(TE, partial.te);
diff_fix_state(VFG, partial.vfg);
diff_fix_state(PS, partial.ps);
diff_fix_state(PS_EXTRA, partial.ps_extra);

View file

@ -1862,7 +1862,9 @@ struct anv_gfx_dynamic_state {
/* 3DSTATE_VFG */
struct {
uint32_t DistributionGranularity;
uint32_t DistributionMode;
bool GranularityThresholdDisable;
bool ListCutIndexEnable;
} vfg;
@ -5214,7 +5216,6 @@ struct anv_graphics_pipeline {
struct anv_gfx_state_ptr te;
struct anv_gfx_state_ptr ps;
struct anv_gfx_state_ptr ps_protected;
struct anv_gfx_state_ptr vfg;
} partial;
};

View file

@ -1133,6 +1133,43 @@ update_ps_extra_kills_pixel(struct anv_gfx_dynamic_state *hw_state,
}
#if GFX_VERx10 >= 125
ALWAYS_INLINE static bool
geom_or_tess_prim_id_used(const struct anv_cmd_graphics_state *gfx)
{
const struct brw_tcs_prog_data *tcs_prog_data =
get_gfx_tcs_prog_data(gfx);
const struct brw_tes_prog_data *tes_prog_data =
get_gfx_tes_prog_data(gfx);
const struct brw_gs_prog_data *gs_prog_data =
get_gfx_gs_prog_data(gfx);
return (tcs_prog_data && tcs_prog_data->include_primitive_id) ||
(tes_prog_data && tes_prog_data->include_primitive_id) ||
(gs_prog_data && gs_prog_data->include_primitive_id);
}
ALWAYS_INLINE static void
update_vfg_distribution_mode(struct anv_gfx_dynamic_state *hw_state,
const struct anv_device *device,
const struct anv_cmd_graphics_state *gfx)
{
const bool needs_instance_granularity =
intel_needs_workaround(device->info, 14019166699) &&
(sbe_primitive_id_override(gfx) || geom_or_tess_prim_id_used(gfx));
SET(VFG, vfg.DistributionMode, (GFX_VER < 20 &&
!anv_gfx_has_stage(gfx, MESA_SHADER_TESS_EVAL)) ?
RR_FREE : RR_STRICT);
SET(VFG, vfg.DistributionGranularity, needs_instance_granularity ?
InstanceLevelGranularity :
BatchLevelGranularity);
#if INTEL_WA_14014851047_GFX_VER
SET(VFG, vfg.GranularityThresholdDisable, intel_needs_workaround(device->info,
14014851047));
#endif
}
ALWAYS_INLINE static void
update_vfg_list_cut_index(struct anv_gfx_dynamic_state *hw_state,
const struct vk_dynamic_graphics_state *dyn)
@ -2308,6 +2345,9 @@ cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state,
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_INDEX_BUFFER);
#if GFX_VERx10 >= 125
if (gfx->dirty & ANV_CMD_DIRTY_PRERASTER_SHADERS)
update_vfg_distribution_mode(hw_state, device, gfx);
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE))
update_vfg_list_cut_index(hw_state, dyn);
#endif
@ -3271,8 +3311,25 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
#if GFX_VERx10 >= 125
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VFG)) {
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_VFG),
pipeline, partial.vfg, vfg) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VFG), vfg) {
/* 192 vertices for TRILIST_ADJ */
vfg.ListNBatchSizeScale = 0;
/* Batch size of 384 vertices */
vfg.List3BatchSizeScale = 2;
/* Batch size of 128 vertices */
vfg.List2BatchSizeScale = 1;
/* Batch size of 128 vertices */
vfg.List1BatchSizeScale = 2;
/* Batch size of 256 vertices for STRIP topologies */
vfg.StripBatchSizeScale = 3;
/* 192 control points for PATCHLIST_3 */
vfg.PatchBatchSizeScale = 1;
/* 192 control points for PATCHLIST_3 */
vfg.PatchBatchSizeMultiplier = 31;
SET(vfg, vfg, DistributionGranularity);
SET(vfg, vfg, DistributionMode);
SET(vfg, vfg, GranularityThresholdDisable);
SET(vfg, vfg, ListCutIndexEnable);
}
}

View file

@ -1475,42 +1475,6 @@ genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline,
{
compute_kill_pixel(pipeline, state->ms, state);
#if GFX_VERx10 >= 125
bool needs_instance_granularity =
intel_needs_workaround(pipeline->base.base.device->info, 14019166699) &&
(sbe_primitive_id_override(pipeline) ||
geom_or_tess_prim_id_used(pipeline));
anv_pipeline_emit(pipeline, partial.vfg, GENX(3DSTATE_VFG), vfg) {
/* Gfx12.5: If 3DSTATE_TE: TE Enable == 1 then RR_STRICT else RR_FREE */
vfg.DistributionMode =
#if GFX_VER < 20
!anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL) ? RR_FREE :
#endif
RR_STRICT;
vfg.DistributionGranularity = needs_instance_granularity ?
InstanceLevelGranularity : BatchLevelGranularity;
#if INTEL_WA_14014851047_GFX_VER
vfg.GranularityThresholdDisable =
intel_needs_workaround(pipeline->base.base.device->info, 14014851047);
#endif
/* 192 vertices for TRILIST_ADJ */
vfg.ListNBatchSizeScale = 0;
/* Batch size of 384 vertices */
vfg.List3BatchSizeScale = 2;
/* Batch size of 128 vertices */
vfg.List2BatchSizeScale = 1;
/* Batch size of 128 vertices */
vfg.List1BatchSizeScale = 2;
/* Batch size of 256 vertices for STRIP topologies */
vfg.StripBatchSizeScale = 3;
/* 192 control points for PATCHLIST_3 */
vfg.PatchBatchSizeScale = 1;
/* 192 control points for PATCHLIST_3 */
vfg.PatchBatchSizeMultiplier = 31;
}
#endif
if (anv_pipeline_is_primitive(pipeline)) {
emit_vertex_input(pipeline, state, state->vi);