mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 15:50:11 +01:00
brw: add support for no VF input slot compaction
Normally the driver & compiler work together to use as few 3DSTATE_VERTEX_ELEMENTS/VERTEX_BUFFER_ELEMENT data as possible. The compiler ignores unused bits and driver avoids emitting the corresponding elements in 3DSTATE_VERTEX_ELEMENTS. For device generated commands, we want an 3DSTATE_VERTEX_ELEMENTS programming that is independent from the shader so that we can implement indirect pipeline binding without complicating the generation shader as well as emitting fewer generated commands. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32418>
This commit is contained in:
parent
f19c5f4fcc
commit
6845dede59
2 changed files with 49 additions and 20 deletions
|
|
@ -88,6 +88,27 @@ brw_nir_pack_vs_input(nir_shader *nir, struct brw_vs_prog_data *prog_data)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* SKL PRMs, Vol 2a: Command Reference: Instructions,
|
||||||
|
* 3DSTATE_VF_COMPONENT_PACKING:
|
||||||
|
*
|
||||||
|
* "At least one component of one "valid" Vertex Element must be
|
||||||
|
* enabled."
|
||||||
|
*/
|
||||||
|
if (nir->info.inputs_read == 0) {
|
||||||
|
if (prog_data->no_vf_slot_compaction) {
|
||||||
|
attributes[VERT_ATTRIB_GENERIC0].is_used = true;
|
||||||
|
attributes[VERT_ATTRIB_GENERIC0].component_mask = 0x1;
|
||||||
|
} else if (!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_IS_INDEXED_DRAW) &&
|
||||||
|
!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FIRST_VERTEX) &&
|
||||||
|
!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE) &&
|
||||||
|
!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) &&
|
||||||
|
!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID) &&
|
||||||
|
!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID)) {
|
||||||
|
attributes[VERT_ATTRIB_GENERIC0].is_used = true;
|
||||||
|
attributes[VERT_ATTRIB_GENERIC0].component_mask = 0x1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Compute the register offsets */
|
/* Compute the register offsets */
|
||||||
unsigned reg_offset = 0;
|
unsigned reg_offset = 0;
|
||||||
unsigned vertex_element = 0;
|
unsigned vertex_element = 0;
|
||||||
|
|
@ -102,7 +123,8 @@ brw_nir_pack_vs_input(nir_shader *nir, struct brw_vs_prog_data *prog_data)
|
||||||
* and therefore no packing is performed on these elements (if
|
* and therefore no packing is performed on these elements (if
|
||||||
* Valid, all 4 components are stored)."
|
* Valid, all 4 components are stored)."
|
||||||
*/
|
*/
|
||||||
if (vertex_element >= 32)
|
if (vertex_element >= 32 ||
|
||||||
|
(prog_data->no_vf_slot_compaction && a >= VERT_ATTRIB_GENERIC(32)))
|
||||||
attributes[a].component_mask = 0xf;
|
attributes[a].component_mask = 0xf;
|
||||||
|
|
||||||
attributes[a].reg_offset = reg_offset;
|
attributes[a].reg_offset = reg_offset;
|
||||||
|
|
@ -141,9 +163,10 @@ brw_nir_pack_vs_input(nir_shader *nir, struct brw_vs_prog_data *prog_data)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Generate the packing array */
|
/* Generate the packing array */
|
||||||
unsigned vf_offset = 0;
|
unsigned vf_element_count = 0;
|
||||||
for (unsigned a = 0; a < ARRAY_SIZE(attributes) && vf_offset < 32; a++) {
|
for (unsigned a = 0; a < ARRAY_SIZE(attributes) && vf_element_count < 32; a++) {
|
||||||
if (!attributes[a].is_used)
|
/* Consider all attributes used when no slot compaction is active */
|
||||||
|
if (!attributes[a].is_used && !prog_data->no_vf_slot_compaction)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
uint32_t mask;
|
uint32_t mask;
|
||||||
|
|
@ -161,22 +184,11 @@ brw_nir_pack_vs_input(nir_shader *nir, struct brw_vs_prog_data *prog_data)
|
||||||
}
|
}
|
||||||
/* We should only have 4bits enabled max */
|
/* We should only have 4bits enabled max */
|
||||||
assert((mask & ~0xfu) == 0);
|
assert((mask & ~0xfu) == 0);
|
||||||
prog_data->vf_component_packing[vf_offset / 8] |=
|
|
||||||
mask << (4 * (vf_offset % 8));
|
|
||||||
vf_offset++;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* SKL PRMs, Vol 2a: Command Reference: Instructions,
|
prog_data->vf_component_packing[vf_element_count / 8] |=
|
||||||
* 3DSTATE_VF_COMPONENT_PACKING:
|
mask << (4 * (vf_element_count % 8));
|
||||||
*
|
vf_element_count++;
|
||||||
* "At least one component of one "valid" Vertex Element must be
|
}
|
||||||
* enabled."
|
|
||||||
*/
|
|
||||||
if (prog_data->vf_component_packing[0] == 0 &&
|
|
||||||
prog_data->vf_component_packing[1] == 0 &&
|
|
||||||
prog_data->vf_component_packing[2] == 0 &&
|
|
||||||
prog_data->vf_component_packing[3] == 0)
|
|
||||||
prog_data->vf_component_packing[0] = 0x1;
|
|
||||||
|
|
||||||
return reg_offset;
|
return reg_offset;
|
||||||
}
|
}
|
||||||
|
|
@ -225,6 +237,13 @@ brw_compile_vs(const struct brw_compiler *compiler,
|
||||||
params->base.debug_flag : DEBUG_VS);
|
params->base.debug_flag : DEBUG_VS);
|
||||||
const unsigned dispatch_width = brw_geometry_stage_dispatch_width(compiler->devinfo);
|
const unsigned dispatch_width = brw_geometry_stage_dispatch_width(compiler->devinfo);
|
||||||
|
|
||||||
|
/* We only expect slot compaction to be disabled when using device
|
||||||
|
* generated commands, to provide an independent 3DSTATE_VERTEX_ELEMENTS
|
||||||
|
* programming. This should always be enabled together with VF component
|
||||||
|
* packing to minimize the size of the payload.
|
||||||
|
*/
|
||||||
|
assert(!key->no_vf_slot_compaction || key->vf_component_packing);
|
||||||
|
|
||||||
prog_data->base.base.stage = MESA_SHADER_VERTEX;
|
prog_data->base.base.stage = MESA_SHADER_VERTEX;
|
||||||
prog_data->base.base.ray_queries = nir->info.ray_queries;
|
prog_data->base.base.ray_queries = nir->info.ray_queries;
|
||||||
prog_data->base.base.total_scratch = 0;
|
prog_data->base.base.total_scratch = 0;
|
||||||
|
|
@ -233,6 +252,7 @@ brw_compile_vs(const struct brw_compiler *compiler,
|
||||||
|
|
||||||
prog_data->inputs_read = nir->info.inputs_read;
|
prog_data->inputs_read = nir->info.inputs_read;
|
||||||
prog_data->double_inputs_read = nir->info.vs.double_inputs;
|
prog_data->double_inputs_read = nir->info.vs.double_inputs;
|
||||||
|
prog_data->no_vf_slot_compaction = key->no_vf_slot_compaction;
|
||||||
|
|
||||||
brw_nir_lower_vs_inputs(nir);
|
brw_nir_lower_vs_inputs(nir);
|
||||||
brw_nir_lower_vue_outputs(nir);
|
brw_nir_lower_vue_outputs(nir);
|
||||||
|
|
|
||||||
|
|
@ -250,7 +250,15 @@ struct brw_vs_prog_key {
|
||||||
*/
|
*/
|
||||||
bool vf_component_packing : 1;
|
bool vf_component_packing : 1;
|
||||||
|
|
||||||
uint32_t padding : 31;
|
/** Prevent compaction of slots of VF inputs
|
||||||
|
*
|
||||||
|
* So that 3DSTATE_VERTEX_ELEMENTS programming remains independent of
|
||||||
|
* shader inputs (essentially an unused location should have an associated
|
||||||
|
* VERTEX_ELEMENT_STATE).
|
||||||
|
*/
|
||||||
|
bool no_vf_slot_compaction : 1;
|
||||||
|
|
||||||
|
uint32_t padding : 30;
|
||||||
};
|
};
|
||||||
|
|
||||||
/** The program key for Tessellation Control Shaders. */
|
/** The program key for Tessellation Control Shaders. */
|
||||||
|
|
@ -1046,6 +1054,7 @@ struct brw_vs_prog_data {
|
||||||
bool uses_firstvertex;
|
bool uses_firstvertex;
|
||||||
bool uses_baseinstance;
|
bool uses_baseinstance;
|
||||||
bool uses_drawid;
|
bool uses_drawid;
|
||||||
|
bool no_vf_slot_compaction;
|
||||||
|
|
||||||
uint32_t vf_component_packing[4];
|
uint32_t vf_component_packing[4];
|
||||||
};
|
};
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue