mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 18:00:13 +01:00
brw: add support for no VF input slot compaction
Normally the driver & compiler work together to use as few 3DSTATE_VERTEX_ELEMENTS/VERTEX_BUFFER_ELEMENT data as possible. The compiler ignores unused bits and driver avoids emitting the corresponding elements in 3DSTATE_VERTEX_ELEMENTS. For device generated commands, we want an 3DSTATE_VERTEX_ELEMENTS programming that is independent from the shader so that we can implement indirect pipeline binding without complicating the generation shader as well as emitting fewer generated commands. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32418>
This commit is contained in:
parent
f19c5f4fcc
commit
6845dede59
2 changed files with 49 additions and 20 deletions
|
|
@ -88,6 +88,27 @@ brw_nir_pack_vs_input(nir_shader *nir, struct brw_vs_prog_data *prog_data)
|
|||
}
|
||||
}
|
||||
|
||||
/* SKL PRMs, Vol 2a: Command Reference: Instructions,
|
||||
* 3DSTATE_VF_COMPONENT_PACKING:
|
||||
*
|
||||
* "At least one component of one "valid" Vertex Element must be
|
||||
* enabled."
|
||||
*/
|
||||
if (nir->info.inputs_read == 0) {
|
||||
if (prog_data->no_vf_slot_compaction) {
|
||||
attributes[VERT_ATTRIB_GENERIC0].is_used = true;
|
||||
attributes[VERT_ATTRIB_GENERIC0].component_mask = 0x1;
|
||||
} else if (!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_IS_INDEXED_DRAW) &&
|
||||
!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FIRST_VERTEX) &&
|
||||
!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE) &&
|
||||
!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) &&
|
||||
!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID) &&
|
||||
!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID)) {
|
||||
attributes[VERT_ATTRIB_GENERIC0].is_used = true;
|
||||
attributes[VERT_ATTRIB_GENERIC0].component_mask = 0x1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute the register offsets */
|
||||
unsigned reg_offset = 0;
|
||||
unsigned vertex_element = 0;
|
||||
|
|
@ -102,7 +123,8 @@ brw_nir_pack_vs_input(nir_shader *nir, struct brw_vs_prog_data *prog_data)
|
|||
* and therefore no packing is performed on these elements (if
|
||||
* Valid, all 4 components are stored)."
|
||||
*/
|
||||
if (vertex_element >= 32)
|
||||
if (vertex_element >= 32 ||
|
||||
(prog_data->no_vf_slot_compaction && a >= VERT_ATTRIB_GENERIC(32)))
|
||||
attributes[a].component_mask = 0xf;
|
||||
|
||||
attributes[a].reg_offset = reg_offset;
|
||||
|
|
@ -141,9 +163,10 @@ brw_nir_pack_vs_input(nir_shader *nir, struct brw_vs_prog_data *prog_data)
|
|||
}
|
||||
|
||||
/* Generate the packing array */
|
||||
unsigned vf_offset = 0;
|
||||
for (unsigned a = 0; a < ARRAY_SIZE(attributes) && vf_offset < 32; a++) {
|
||||
if (!attributes[a].is_used)
|
||||
unsigned vf_element_count = 0;
|
||||
for (unsigned a = 0; a < ARRAY_SIZE(attributes) && vf_element_count < 32; a++) {
|
||||
/* Consider all attributes used when no slot compaction is active */
|
||||
if (!attributes[a].is_used && !prog_data->no_vf_slot_compaction)
|
||||
continue;
|
||||
|
||||
uint32_t mask;
|
||||
|
|
@ -161,22 +184,11 @@ brw_nir_pack_vs_input(nir_shader *nir, struct brw_vs_prog_data *prog_data)
|
|||
}
|
||||
/* We should only have 4bits enabled max */
|
||||
assert((mask & ~0xfu) == 0);
|
||||
prog_data->vf_component_packing[vf_offset / 8] |=
|
||||
mask << (4 * (vf_offset % 8));
|
||||
vf_offset++;
|
||||
}
|
||||
|
||||
/* SKL PRMs, Vol 2a: Command Reference: Instructions,
|
||||
* 3DSTATE_VF_COMPONENT_PACKING:
|
||||
*
|
||||
* "At least one component of one "valid" Vertex Element must be
|
||||
* enabled."
|
||||
*/
|
||||
if (prog_data->vf_component_packing[0] == 0 &&
|
||||
prog_data->vf_component_packing[1] == 0 &&
|
||||
prog_data->vf_component_packing[2] == 0 &&
|
||||
prog_data->vf_component_packing[3] == 0)
|
||||
prog_data->vf_component_packing[0] = 0x1;
|
||||
prog_data->vf_component_packing[vf_element_count / 8] |=
|
||||
mask << (4 * (vf_element_count % 8));
|
||||
vf_element_count++;
|
||||
}
|
||||
|
||||
return reg_offset;
|
||||
}
|
||||
|
|
@ -225,6 +237,13 @@ brw_compile_vs(const struct brw_compiler *compiler,
|
|||
params->base.debug_flag : DEBUG_VS);
|
||||
const unsigned dispatch_width = brw_geometry_stage_dispatch_width(compiler->devinfo);
|
||||
|
||||
/* We only expect slot compaction to be disabled when using device
|
||||
* generated commands, to provide an independent 3DSTATE_VERTEX_ELEMENTS
|
||||
* programming. This should always be enabled together with VF component
|
||||
* packing to minimize the size of the payload.
|
||||
*/
|
||||
assert(!key->no_vf_slot_compaction || key->vf_component_packing);
|
||||
|
||||
prog_data->base.base.stage = MESA_SHADER_VERTEX;
|
||||
prog_data->base.base.ray_queries = nir->info.ray_queries;
|
||||
prog_data->base.base.total_scratch = 0;
|
||||
|
|
@ -233,6 +252,7 @@ brw_compile_vs(const struct brw_compiler *compiler,
|
|||
|
||||
prog_data->inputs_read = nir->info.inputs_read;
|
||||
prog_data->double_inputs_read = nir->info.vs.double_inputs;
|
||||
prog_data->no_vf_slot_compaction = key->no_vf_slot_compaction;
|
||||
|
||||
brw_nir_lower_vs_inputs(nir);
|
||||
brw_nir_lower_vue_outputs(nir);
|
||||
|
|
|
|||
|
|
@ -250,7 +250,15 @@ struct brw_vs_prog_key {
|
|||
*/
|
||||
bool vf_component_packing : 1;
|
||||
|
||||
uint32_t padding : 31;
|
||||
/** Prevent compaction of slots of VF inputs
|
||||
*
|
||||
* So that 3DSTATE_VERTEX_ELEMENTS programming remains independent of
|
||||
* shader inputs (essentially an unused location should have an associated
|
||||
* VERTEX_ELEMENT_STATE).
|
||||
*/
|
||||
bool no_vf_slot_compaction : 1;
|
||||
|
||||
uint32_t padding : 30;
|
||||
};
|
||||
|
||||
/** The program key for Tessellation Control Shaders. */
|
||||
|
|
@ -1046,6 +1054,7 @@ struct brw_vs_prog_data {
|
|||
bool uses_firstvertex;
|
||||
bool uses_baseinstance;
|
||||
bool uses_drawid;
|
||||
bool no_vf_slot_compaction;
|
||||
|
||||
uint32_t vf_component_packing[4];
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue