brw: add support for no VF input slot compaction

Normally the driver & compiler work together to use as few
3DSTATE_VERTEX_ELEMENTS/VERTEX_BUFFER_ELEMENT data as possible.

The compiler ignores unused bits and driver avoids emitting the
corresponding elements in 3DSTATE_VERTEX_ELEMENTS.

For device generated commands, we want an 3DSTATE_VERTEX_ELEMENTS
programming that is independent from the shader so that we can
implement indirect pipeline binding without complicating the
generation shader as well as emitting fewer generated commands.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32418>
This commit is contained in:
Lionel Landwerlin 2024-12-02 15:00:34 +02:00 committed by Marge Bot
parent f19c5f4fcc
commit 6845dede59
2 changed files with 49 additions and 20 deletions

View file

@ -88,6 +88,27 @@ brw_nir_pack_vs_input(nir_shader *nir, struct brw_vs_prog_data *prog_data)
}
}
/* SKL PRMs, Vol 2a: Command Reference: Instructions,
* 3DSTATE_VF_COMPONENT_PACKING:
*
* "At least one component of one "valid" Vertex Element must be
* enabled."
*/
if (nir->info.inputs_read == 0) {
if (prog_data->no_vf_slot_compaction) {
attributes[VERT_ATTRIB_GENERIC0].is_used = true;
attributes[VERT_ATTRIB_GENERIC0].component_mask = 0x1;
} else if (!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_IS_INDEXED_DRAW) &&
!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FIRST_VERTEX) &&
!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE) &&
!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) &&
!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID) &&
!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID)) {
attributes[VERT_ATTRIB_GENERIC0].is_used = true;
attributes[VERT_ATTRIB_GENERIC0].component_mask = 0x1;
}
}
/* Compute the register offsets */
unsigned reg_offset = 0;
unsigned vertex_element = 0;
@ -102,7 +123,8 @@ brw_nir_pack_vs_input(nir_shader *nir, struct brw_vs_prog_data *prog_data)
* and therefore no packing is performed on these elements (if
* Valid, all 4 components are stored)."
*/
if (vertex_element >= 32)
if (vertex_element >= 32 ||
(prog_data->no_vf_slot_compaction && a >= VERT_ATTRIB_GENERIC(32)))
attributes[a].component_mask = 0xf;
attributes[a].reg_offset = reg_offset;
@ -141,9 +163,10 @@ brw_nir_pack_vs_input(nir_shader *nir, struct brw_vs_prog_data *prog_data)
}
/* Generate the packing array */
unsigned vf_offset = 0;
for (unsigned a = 0; a < ARRAY_SIZE(attributes) && vf_offset < 32; a++) {
if (!attributes[a].is_used)
unsigned vf_element_count = 0;
for (unsigned a = 0; a < ARRAY_SIZE(attributes) && vf_element_count < 32; a++) {
/* Consider all attributes used when no slot compaction is active */
if (!attributes[a].is_used && !prog_data->no_vf_slot_compaction)
continue;
uint32_t mask;
@ -161,22 +184,11 @@ brw_nir_pack_vs_input(nir_shader *nir, struct brw_vs_prog_data *prog_data)
}
/* We should only have 4bits enabled max */
assert((mask & ~0xfu) == 0);
prog_data->vf_component_packing[vf_offset / 8] |=
mask << (4 * (vf_offset % 8));
vf_offset++;
}
/* SKL PRMs, Vol 2a: Command Reference: Instructions,
* 3DSTATE_VF_COMPONENT_PACKING:
*
* "At least one component of one "valid" Vertex Element must be
* enabled."
*/
if (prog_data->vf_component_packing[0] == 0 &&
prog_data->vf_component_packing[1] == 0 &&
prog_data->vf_component_packing[2] == 0 &&
prog_data->vf_component_packing[3] == 0)
prog_data->vf_component_packing[0] = 0x1;
prog_data->vf_component_packing[vf_element_count / 8] |=
mask << (4 * (vf_element_count % 8));
vf_element_count++;
}
return reg_offset;
}
@ -225,6 +237,13 @@ brw_compile_vs(const struct brw_compiler *compiler,
params->base.debug_flag : DEBUG_VS);
const unsigned dispatch_width = brw_geometry_stage_dispatch_width(compiler->devinfo);
/* We only expect slot compaction to be disabled when using device
* generated commands, to provide an independent 3DSTATE_VERTEX_ELEMENTS
* programming. This should always be enabled together with VF component
* packing to minimize the size of the payload.
*/
assert(!key->no_vf_slot_compaction || key->vf_component_packing);
prog_data->base.base.stage = MESA_SHADER_VERTEX;
prog_data->base.base.ray_queries = nir->info.ray_queries;
prog_data->base.base.total_scratch = 0;
@ -233,6 +252,7 @@ brw_compile_vs(const struct brw_compiler *compiler,
prog_data->inputs_read = nir->info.inputs_read;
prog_data->double_inputs_read = nir->info.vs.double_inputs;
prog_data->no_vf_slot_compaction = key->no_vf_slot_compaction;
brw_nir_lower_vs_inputs(nir);
brw_nir_lower_vue_outputs(nir);

View file

@ -250,7 +250,15 @@ struct brw_vs_prog_key {
*/
bool vf_component_packing : 1;
uint32_t padding : 31;
/** Prevent compaction of slots of VF inputs
*
* So that 3DSTATE_VERTEX_ELEMENTS programming remains independent of
* shader inputs (essentially an unused location should have an associated
* VERTEX_ELEMENT_STATE).
*/
bool no_vf_slot_compaction : 1;
uint32_t padding : 30;
};
/** The program key for Tessellation Control Shaders. */
@ -1046,6 +1054,7 @@ struct brw_vs_prog_data {
bool uses_firstvertex;
bool uses_baseinstance;
bool uses_drawid;
bool no_vf_slot_compaction;
uint32_t vf_component_packing[4];
};