From 6845dede599a6a8c9fab5772dec96e57c115bdcc Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 2 Dec 2024 15:00:34 +0200 Subject: [PATCH] brw: add support for no VF input slot compaction Normally the driver & compiler work together to use as few 3DSTATE_VERTEX_ELEMENTS/VERTEX_BUFFER_ELEMENT data as possible. The compiler ignores unused bits and driver avoids emitting the corresponding elements in 3DSTATE_VERTEX_ELEMENTS. For device generated commands, we want an 3DSTATE_VERTEX_ELEMENTS programming that is independent from the shader so that we can implement indirect pipeline binding without complicating the generation shader as well as emitting fewer generated commands. Signed-off-by: Lionel Landwerlin Reviewed-by: Caio Oliveira Part-of: --- src/intel/compiler/brw_compile_vs.cpp | 58 ++++++++++++++++++--------- src/intel/compiler/brw_compiler.h | 11 ++++- 2 files changed, 49 insertions(+), 20 deletions(-) diff --git a/src/intel/compiler/brw_compile_vs.cpp b/src/intel/compiler/brw_compile_vs.cpp index 4c1fb610dcc..814dd012df0 100644 --- a/src/intel/compiler/brw_compile_vs.cpp +++ b/src/intel/compiler/brw_compile_vs.cpp @@ -88,6 +88,27 @@ brw_nir_pack_vs_input(nir_shader *nir, struct brw_vs_prog_data *prog_data) } } + /* SKL PRMs, Vol 2a: Command Reference: Instructions, + * 3DSTATE_VF_COMPONENT_PACKING: + * + * "At least one component of one "valid" Vertex Element must be + * enabled." + */ + if (nir->info.inputs_read == 0) { + if (prog_data->no_vf_slot_compaction) { + attributes[VERT_ATTRIB_GENERIC0].is_used = true; + attributes[VERT_ATTRIB_GENERIC0].component_mask = 0x1; + } else if (!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_IS_INDEXED_DRAW) && + !BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FIRST_VERTEX) && + !BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE) && + !BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) && + !BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID) && + !BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID)) { + attributes[VERT_ATTRIB_GENERIC0].is_used = true; + attributes[VERT_ATTRIB_GENERIC0].component_mask = 0x1; + } + } + /* Compute the register offsets */ unsigned reg_offset = 0; unsigned vertex_element = 0; @@ -102,7 +123,8 @@ brw_nir_pack_vs_input(nir_shader *nir, struct brw_vs_prog_data *prog_data) * and therefore no packing is performed on these elements (if * Valid, all 4 components are stored)." */ - if (vertex_element >= 32) + if (vertex_element >= 32 || + (prog_data->no_vf_slot_compaction && a >= VERT_ATTRIB_GENERIC(32))) attributes[a].component_mask = 0xf; attributes[a].reg_offset = reg_offset; @@ -141,9 +163,10 @@ brw_nir_pack_vs_input(nir_shader *nir, struct brw_vs_prog_data *prog_data) } /* Generate the packing array */ - unsigned vf_offset = 0; - for (unsigned a = 0; a < ARRAY_SIZE(attributes) && vf_offset < 32; a++) { - if (!attributes[a].is_used) + unsigned vf_element_count = 0; + for (unsigned a = 0; a < ARRAY_SIZE(attributes) && vf_element_count < 32; a++) { + /* Consider all attributes used when no slot compaction is active */ + if (!attributes[a].is_used && !prog_data->no_vf_slot_compaction) continue; uint32_t mask; @@ -161,22 +184,11 @@ brw_nir_pack_vs_input(nir_shader *nir, struct brw_vs_prog_data *prog_data) } /* We should only have 4bits enabled max */ assert((mask & ~0xfu) == 0); - prog_data->vf_component_packing[vf_offset / 8] |= - mask << (4 * (vf_offset % 8)); - vf_offset++; - } - /* SKL PRMs, Vol 2a: Command Reference: Instructions, - * 3DSTATE_VF_COMPONENT_PACKING: - * - * "At least one component of one "valid" Vertex Element must be - * enabled." - */ - if (prog_data->vf_component_packing[0] == 0 && - prog_data->vf_component_packing[1] == 0 && - prog_data->vf_component_packing[2] == 0 && - prog_data->vf_component_packing[3] == 0) - prog_data->vf_component_packing[0] = 0x1; + prog_data->vf_component_packing[vf_element_count / 8] |= + mask << (4 * (vf_element_count % 8)); + vf_element_count++; + } return reg_offset; } @@ -225,6 +237,13 @@ brw_compile_vs(const struct brw_compiler *compiler, params->base.debug_flag : DEBUG_VS); const unsigned dispatch_width = brw_geometry_stage_dispatch_width(compiler->devinfo); + /* We only expect slot compaction to be disabled when using device + * generated commands, to provide an independent 3DSTATE_VERTEX_ELEMENTS + * programming. This should always be enabled together with VF component + * packing to minimize the size of the payload. + */ + assert(!key->no_vf_slot_compaction || key->vf_component_packing); + prog_data->base.base.stage = MESA_SHADER_VERTEX; prog_data->base.base.ray_queries = nir->info.ray_queries; prog_data->base.base.total_scratch = 0; @@ -233,6 +252,7 @@ brw_compile_vs(const struct brw_compiler *compiler, prog_data->inputs_read = nir->info.inputs_read; prog_data->double_inputs_read = nir->info.vs.double_inputs; + prog_data->no_vf_slot_compaction = key->no_vf_slot_compaction; brw_nir_lower_vs_inputs(nir); brw_nir_lower_vue_outputs(nir); diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index dcbec3ee3c0..53b6a5885dd 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -250,7 +250,15 @@ struct brw_vs_prog_key { */ bool vf_component_packing : 1; - uint32_t padding : 31; + /** Prevent compaction of slots of VF inputs + * + * So that 3DSTATE_VERTEX_ELEMENTS programming remains independent of + * shader inputs (essentially an unused location should have an associated + * VERTEX_ELEMENT_STATE). + */ + bool no_vf_slot_compaction : 1; + + uint32_t padding : 30; }; /** The program key for Tessellation Control Shaders. */ @@ -1046,6 +1054,7 @@ struct brw_vs_prog_data { bool uses_firstvertex; bool uses_baseinstance; bool uses_drawid; + bool no_vf_slot_compaction; uint32_t vf_component_packing[4]; };