anv: enable vertex fetching component packing

DG2 a/b testing:
   Borderlands3	 -0.55%
   Cyberpunk     +0.38%
   Superposition -0.67%

The shader stats mostly don't look like an improvement :

DG2 shader stats:

  Blackops 3:
  Totals from 265 (16.44% of 1612) affected shaders:
  Instrs: 109055 -> 109080 (+0.02%); split: -0.01%, +0.04%
  Cycle count: 6166549 -> 6021371 (-2.35%); split: -2.53%, +0.17%

  Cyberpunk 2077:
  Totals from 297 (23.50% of 1264) affected shaders:
  Instrs: 197305 -> 197297 (-0.00%); split: -0.03%, +0.02%
  Cycle count: 3374325 -> 3356562 (-0.53%); split: -1.23%, +0.70%

  Fortnite:
  Totals from 2090 (27.97% of 7471) affected shaders:
  Instrs: 1777944 -> 1781070 (+0.18%); split: -0.01%, +0.18%
  Cycle count: 25188758 -> 25162910 (-0.10%); split: -0.86%, +0.76%
  Spill count: 1439 -> 1729 (+20.15%); split: -0.69%, +20.85%
  Fill count: 1226 -> 1395 (+13.78%); split: -0.82%, +14.60%
  Scratch Memory Size: 122880 -> 138240 (+12.50%); split: -1.67%, +14.17%

  Hitman 3:
  Totals from 490 (9.09% of 5392) affected shaders:
  Instrs: 407489 -> 407486 (-0.00%); split: -0.00%, +0.00%
  Cycle count: 1831149 -> 1831890 (+0.04%); split: -0.33%, +0.38%

  Metro Exodus:
  Totals from 4169 (9.68% of 43076) affected shaders:
  Instrs: 817730 -> 817726 (-0.00%); split: -0.00%, +0.00%
  Cycle count: 4646954 -> 4641559 (-0.12%); split: -0.61%, +0.50%

Xe2 shader stats :

  Blackops 3:
  Totals from 283 (19.46% of 1454) affected shaders:
  Cycle count: 7662980 -> 7916316 (+3.31%); split: -0.38%, +3.69%

  Cyberpunk 2077:
  Totals from 329 (26.79% of 1228) affected shaders:
  Instrs: 203312 -> 203327 (+0.01%); split: -0.01%, +0.02%
  Cycle count: 4415812 -> 4434906 (+0.43%); split: -0.69%, +1.12%

  Fortnite:
  Totals from 1981 (30.18% of 6565) affected shaders:
  Instrs: 1709583 -> 1711379 (+0.11%); split: -0.00%, +0.11%
  Cycle count: 26882682 -> 26914014 (+0.12%); split: -0.66%, +0.78%
  Spill count: 863 -> 1020 (+18.19%)
  Fill count: 1195 -> 1271 (+6.36%)
  Scratch Memory Size: 116736 -> 122880 (+5.26%)

  Hitman 3:
  Totals from 540 (10.56% of 5115) affected shaders:
  Instrs: 478993 -> 478994 (+0.00%)
  Cycle count: 3198740 -> 3198416 (-0.01%); split: -0.27%, +0.26%

  Metro Exodus:
  Totals from 4554 (12.28% of 37071) affected shaders:
  Cycle count: 6460340 -> 6475666 (+0.24%); split: -0.38%, +0.62%

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32418>
This commit is contained in:
Lionel Landwerlin 2024-11-29 12:38:27 +02:00 committed by Marge Bot
parent 462d8e3fab
commit a85717f313
10 changed files with 58 additions and 1 deletions

View file

@ -159,6 +159,11 @@ anv_blorp_batch_init(struct anv_cmd_buffer *cmd_buffer,
assert((flags & BLORP_BATCH_USE_BLITTER) == 0 ||
(flags & BLORP_BATCH_USE_COMPUTE) == 0);
/* If blorp needs a VS shader, we can't have the component packing of the
* driver interfere with blorp's shader.
*/
flags |= BLORP_BATCH_EMIT_3DSTATE_VF;
blorp_batch_init(&cmd_buffer->device->blorp.context, batch, cmd_buffer, flags);
}

View file

@ -551,6 +551,7 @@ anv_cmd_buffer_flush_pipeline_state(struct anv_cmd_buffer *cmd_buffer,
diff_fix_state(VF_SGVS, final.vf_sgvs);
if (cmd_buffer->device->info->ver >= 11)
diff_fix_state(VF_SGVS_2, final.vf_sgvs_2);
diff_fix_state(VF_COMPONENT_PACKING, final.vf_component_packing);
if (cmd_buffer->device->info->ver >= 12)
diff_fix_state(PRIMITIVE_REPLICATION, final.primitive_replication);
diff_fix_state(SBE, final.sbe);

View file

@ -59,6 +59,7 @@ static const driOptionDescription anv_dri_options[] = {
#else
DRI_CONF_VK_REQUIRE_ASTC(false)
#endif
DRI_CONF_ANV_VF_COMPONENT_PACKING(true)
DRI_CONF_SECTION_END
DRI_CONF_SECTION_QUALITY
@ -186,6 +187,8 @@ anv_init_dri_options(struct anv_instance *instance)
instance->custom_border_colors_without_format =
driQueryOptionb(&instance->dri_options,
"custom_border_colors_without_format");
instance->vf_component_packing =
driQueryOptionb(&instance->dri_options, "anv_vf_component_packing");
instance->stack_ids = driQueryOptioni(&instance->dri_options, "intel_stack_id");
switch (instance->stack_ids) {

View file

@ -332,6 +332,9 @@ populate_vs_prog_key(struct anv_pipeline_stage *stage,
memset(&stage->key, 0, sizeof(stage->key));
populate_base_prog_key(stage, device);
stage->key.vs.vf_component_packing =
device->physical->instance->vf_component_packing;
}
static void

View file

@ -1319,6 +1319,7 @@ struct anv_instance {
bool anv_fake_nonlocal_memory;
bool anv_upper_bound_descriptor_pool_sampler;
bool custom_border_colors_without_format;
bool vf_component_packing;
/* HW workarounds */
bool no_16bit;
@ -1444,6 +1445,7 @@ enum anv_gfx_state_bits {
ANV_GFX_STATE_VF_SGVS_2,
ANV_GFX_STATE_VF_SGVS_VI, /* 3DSTATE_VERTEX_ELEMENTS for sgvs elements */
ANV_GFX_STATE_VF_SGVS_INSTANCING, /* 3DSTATE_VF_INSTANCING for sgvs elements */
ANV_GFX_STATE_VF_COMPONENT_PACKING,
ANV_GFX_STATE_PRIMITIVE_REPLICATION,
ANV_GFX_STATE_SBE,
ANV_GFX_STATE_SBE_SWIZ,
@ -4945,6 +4947,7 @@ struct anv_graphics_pipeline {
struct anv_gfx_state_ptr vf_sgvs_2;
struct anv_gfx_state_ptr vf_sgvs_instancing;
struct anv_gfx_state_ptr vf_instancing;
struct anv_gfx_state_ptr vf_component_packing;
struct anv_gfx_state_ptr primitive_replication;
struct anv_gfx_state_ptr sbe;
struct anv_gfx_state_ptr sbe_swiz;

View file

@ -2174,6 +2174,12 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.vf_sgvs_2);
#endif
if (device->physical->instance->vf_component_packing &&
BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_COMPONENT_PACKING)) {
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline,
final.vf_component_packing);
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VS)) {
anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
final.vs, protected);
@ -2659,6 +2665,8 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
#if GFX_VERx10 >= 125
vf.GeometryDistributionEnable = true;
#endif
vf.ComponentPackingEnable =
device->physical->instance->vf_component_packing;
SET(vf, vf, IndexedDrawCutIndexEnable);
SET(vf, vf, CutIndex);
}

View file

@ -78,6 +78,14 @@ emit_common_so_memcpy(struct anv_memcpy_state *state,
vfi.VertexElementIndex = 0;
}
anv_batch_emit(batch, GENX(3DSTATE_VF_STATISTICS), vfs);
anv_batch_emit(batch, GENX(3DSTATE_VF), vf) {
#if GFX_VERx10 >= 125
/* Memcpy has no requirement that we need to disable geometry
* distribution.
*/
vf.GeometryDistributionEnable = true;
#endif
}
anv_batch_emit(batch, GENX(3DSTATE_VF_SGVS), sgvs);
#if GFX_VER >= 11
anv_batch_emit(batch, GENX(3DSTATE_VF_SGVS_2), sgvs);

View file

@ -432,6 +432,16 @@ emit_vertex_input(struct anv_graphics_pipeline *pipeline,
sgvs.XP2ElementOffset = drawid_slot;
}
#endif
if (pipeline->base.base.device->physical->instance->vf_component_packing) {
anv_pipeline_emit(pipeline, final.vf_component_packing,
GENX(3DSTATE_VF_COMPONENT_PACKING), vfc) {
vfc.VertexElementEnablesDW[0] = vs_prog_data->vf_component_packing[0];
vfc.VertexElementEnablesDW[1] = vs_prog_data->vf_component_packing[1];
vfc.VertexElementEnablesDW[2] = vs_prog_data->vf_component_packing[2];
vfc.VertexElementEnablesDW[3] = vs_prog_data->vf_component_packing[3];
}
}
}
void
@ -2024,6 +2034,10 @@ genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline,
#if GFX_VER >= 11
anv_pipeline_emit(pipeline, final.vf_sgvs_2, GENX(3DSTATE_VF_SGVS_2), sgvs);
#endif
if (pipeline->base.base.device->physical->instance->vf_component_packing) {
anv_pipeline_emit(pipeline, final.vf_component_packing,
GENX(3DSTATE_VF_COMPONENT_PACKING), vfc);
}
anv_pipeline_emit(pipeline, final.vs, GENX(3DSTATE_VS), vs);
anv_pipeline_emit(pipeline, final.hs, GENX(3DSTATE_HS), hs);
anv_pipeline_emit(pipeline, final.ds, GENX(3DSTATE_DS), ds);

View file

@ -77,7 +77,15 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state)
.Component3Control = VFCOMP_STORE_1_FP,
});
anv_batch_emit(batch, GENX(3DSTATE_VF_STATISTICS), vf);
anv_batch_emit(batch, GENX(3DSTATE_VF_STATISTICS), vfs);
anv_batch_emit(batch, GENX(3DSTATE_VF), vf) {
#if GFX_VERx10 >= 125
/* Simple shaders have no requirement that we need to disable geometry
* distribution.
*/
vf.GeometryDistributionEnable = true;
#endif
}
anv_batch_emit(batch, GENX(3DSTATE_VF_SGVS), sgvs) {
sgvs.InstanceIDEnable = true;
sgvs.InstanceIDComponentNumber = COMP_1;

View file

@ -857,6 +857,10 @@
DRI_CONF_OPT_B(anv_upper_bound_descriptor_pool_sampler, def, \
"Overallocate samplers in descriptor pools to workaround app bug")
#define DRI_CONF_ANV_VF_COMPONENT_PACKING(def) \
DRI_CONF_OPT_B(anv_vf_component_packing, def, \
"Vertex fetching component packing")
/**
* \brief HASVK specific configuration options
*/