mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-04 20:38:06 +02:00
radv: cleanup computing the workgroup size for all stages
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18278>
This commit is contained in:
parent
5082918d65
commit
60a7115b4e
2 changed files with 58 additions and 63 deletions
|
|
@ -2892,41 +2892,6 @@ radv_fill_shader_info(struct radv_pipeline *pipeline,
|
|||
}
|
||||
|
||||
radv_nir_shader_info_link(device, pipeline_key, stages, pipeline_has_ngg, last_vgt_api_stage);
|
||||
|
||||
if (stages[MESA_SHADER_TESS_CTRL].nir) {
|
||||
for (gl_shader_stage s = MESA_SHADER_VERTEX; s <= MESA_SHADER_TESS_CTRL; ++s) {
|
||||
stages[s].info.workgroup_size =
|
||||
ac_compute_lshs_workgroup_size(device->physical_device->rad_info.gfx_level, s,
|
||||
stages[MESA_SHADER_TESS_CTRL].info.num_tess_patches,
|
||||
pipeline_key->tcs.tess_input_vertices,
|
||||
stages[MESA_SHADER_TESS_CTRL].info.tcs.tcs_vertices_out);
|
||||
}
|
||||
}
|
||||
|
||||
/* PS always operates without workgroups. */
|
||||
if (stages[MESA_SHADER_FRAGMENT].nir)
|
||||
stages[MESA_SHADER_FRAGMENT].info.workgroup_size = stages[MESA_SHADER_FRAGMENT].info.wave_size;
|
||||
|
||||
if (stages[MESA_SHADER_COMPUTE].nir) {
|
||||
/* Variable workgroup size is not supported by Vulkan. */
|
||||
assert(!stages[MESA_SHADER_COMPUTE].nir->info.workgroup_size_variable);
|
||||
|
||||
stages[MESA_SHADER_COMPUTE].info.workgroup_size =
|
||||
ac_compute_cs_workgroup_size(
|
||||
stages[MESA_SHADER_COMPUTE].nir->info.workgroup_size, false, UINT32_MAX);
|
||||
}
|
||||
|
||||
if (stages[MESA_SHADER_TASK].nir) {
|
||||
stages[MESA_SHADER_TASK].info.workgroup_size =
|
||||
ac_compute_cs_workgroup_size(
|
||||
stages[MESA_SHADER_TASK].nir->info.workgroup_size, false, UINT32_MAX);
|
||||
}
|
||||
|
||||
if (!pipeline_has_ngg && !stages[MESA_SHADER_GEOMETRY].nir) {
|
||||
gl_shader_stage hw_vs_api_stage =
|
||||
stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
|
||||
stages[hw_vs_api_stage].info.workgroup_size = stages[hw_vs_api_stage].info.wave_size;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -796,6 +796,23 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
|
|||
|
||||
info->wave_size = radv_get_wave_size(device, nir->info.stage, info);
|
||||
info->ballot_bit_size = radv_get_ballot_bit_size(device, nir->info.stage, info);
|
||||
|
||||
switch (nir->info.stage) {
|
||||
case MESA_SHADER_COMPUTE:
|
||||
case MESA_SHADER_TASK:
|
||||
info->workgroup_size =
|
||||
ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX);
|
||||
break;
|
||||
case MESA_SHADER_MESH:
|
||||
/* Already computed in gather_shader_info_mesh(). */
|
||||
break;
|
||||
default:
|
||||
/* FS always operates without workgroups. Other stages are computed during linking but assume
|
||||
* no workgroups by default.
|
||||
*/
|
||||
info->workgroup_size = info->wave_size;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1273,39 +1290,52 @@ radv_link_shaders_info(struct radv_device *device,
|
|||
}
|
||||
}
|
||||
|
||||
if (producer->stage == MESA_SHADER_VERTEX && consumer->stage == MESA_SHADER_TESS_CTRL &&
|
||||
!radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX)) {
|
||||
if (producer->stage == MESA_SHADER_VERTEX && consumer->stage == MESA_SHADER_TESS_CTRL) {
|
||||
struct radv_pipeline_stage *vs_stage = producer;
|
||||
struct radv_pipeline_stage *tcs_stage = consumer;
|
||||
|
||||
/* When the number of TCS input and output vertices are the same (typically 3):
|
||||
* - There is an equal amount of LS and HS invocations
|
||||
* - In case of merged LSHS shaders, the LS and HS halves of the shader always process the
|
||||
* exact same vertex. We can use this knowledge to optimize them.
|
||||
*
|
||||
* We don't set tcs_in_out_eq if the float controls differ because that might involve
|
||||
* different float modes for the same block and our optimizer doesn't handle a instruction
|
||||
* dominating another with a different mode.
|
||||
*/
|
||||
vs_stage->info.vs.tcs_in_out_eq =
|
||||
device->physical_device->rad_info.gfx_level >= GFX9 &&
|
||||
pipeline_key->tcs.tess_input_vertices == tcs_stage->info.tcs.tcs_vertices_out &&
|
||||
vs_stage->nir->info.float_controls_execution_mode ==
|
||||
tcs_stage->nir->info.float_controls_execution_mode;
|
||||
vs_stage->info.workgroup_size =
|
||||
ac_compute_lshs_workgroup_size(device->physical_device->rad_info.gfx_level,
|
||||
MESA_SHADER_VERTEX, tcs_stage->info.num_tess_patches,
|
||||
pipeline_key->tcs.tess_input_vertices,
|
||||
tcs_stage->info.tcs.tcs_vertices_out);
|
||||
|
||||
if (vs_stage->info.vs.tcs_in_out_eq)
|
||||
vs_stage->info.vs.tcs_temp_only_input_mask =
|
||||
tcs_stage->nir->info.inputs_read &
|
||||
vs_stage->nir->info.outputs_written &
|
||||
~tcs_stage->nir->info.tess.tcs_cross_invocation_inputs_read &
|
||||
~tcs_stage->nir->info.inputs_read_indirectly &
|
||||
~vs_stage->nir->info.outputs_accessed_indirectly;
|
||||
tcs_stage->info.workgroup_size =
|
||||
ac_compute_lshs_workgroup_size(device->physical_device->rad_info.gfx_level,
|
||||
MESA_SHADER_TESS_CTRL, tcs_stage->info.num_tess_patches,
|
||||
pipeline_key->tcs.tess_input_vertices,
|
||||
tcs_stage->info.tcs.tcs_vertices_out);
|
||||
|
||||
/* Copy data to TCS so it can be accessed by the backend if they are merged. */
|
||||
tcs_stage->info.vs.tcs_in_out_eq =
|
||||
vs_stage->info.vs.tcs_in_out_eq;
|
||||
tcs_stage->info.vs.tcs_temp_only_input_mask =
|
||||
vs_stage->info.vs.tcs_temp_only_input_mask;
|
||||
if (!radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX)) {
|
||||
/* When the number of TCS input and output vertices are the same (typically 3):
|
||||
* - There is an equal amount of LS and HS invocations
|
||||
* - In case of merged LSHS shaders, the LS and HS halves of the shader always process the
|
||||
* exact same vertex. We can use this knowledge to optimize them.
|
||||
*
|
||||
* We don't set tcs_in_out_eq if the float controls differ because that might involve
|
||||
* different float modes for the same block and our optimizer doesn't handle a instruction
|
||||
* dominating another with a different mode.
|
||||
*/
|
||||
vs_stage->info.vs.tcs_in_out_eq =
|
||||
device->physical_device->rad_info.gfx_level >= GFX9 &&
|
||||
pipeline_key->tcs.tess_input_vertices == tcs_stage->info.tcs.tcs_vertices_out &&
|
||||
vs_stage->nir->info.float_controls_execution_mode ==
|
||||
tcs_stage->nir->info.float_controls_execution_mode;
|
||||
|
||||
if (vs_stage->info.vs.tcs_in_out_eq)
|
||||
vs_stage->info.vs.tcs_temp_only_input_mask =
|
||||
tcs_stage->nir->info.inputs_read &
|
||||
vs_stage->nir->info.outputs_written &
|
||||
~tcs_stage->nir->info.tess.tcs_cross_invocation_inputs_read &
|
||||
~tcs_stage->nir->info.inputs_read_indirectly &
|
||||
~vs_stage->nir->info.outputs_accessed_indirectly;
|
||||
|
||||
/* Copy data to TCS so it can be accessed by the backend if they are merged. */
|
||||
tcs_stage->info.vs.tcs_in_out_eq =
|
||||
vs_stage->info.vs.tcs_in_out_eq;
|
||||
tcs_stage->info.vs.tcs_temp_only_input_mask =
|
||||
vs_stage->info.vs.tcs_temp_only_input_mask;
|
||||
}
|
||||
}
|
||||
|
||||
/* Copy shader info between TCS<->TES. */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue