mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-26 01:20:22 +01:00
radeonsi: lower compute system values later
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38802>
This commit is contained in:
parent
3cc5517925
commit
8ceef4e3d4
2 changed files with 45 additions and 45 deletions
|
|
@ -638,6 +638,51 @@ static void run_pre_link_optimization_passes(struct si_nir_shader_ctx *ctx)
|
|||
nir_shader *nir = ctx->nir;
|
||||
bool progress = false;
|
||||
|
||||
if (mesa_shader_stage_is_compute(nir->info.stage)) {
|
||||
/* gl_LocalInvocationIndex must be derived from gl_LocalInvocationID.xyz to make it correct
|
||||
* with quad derivatives. Using gl_SubgroupID for that (which is what we do by default) is
|
||||
* incorrect with a non-linear thread order.
|
||||
*
|
||||
* On Gfx12, we always use a non-linear thread order if the workgroup X and Y size is
|
||||
* divisible by 2.
|
||||
*/
|
||||
NIR_PASS(progress, nir, nir_lower_compute_system_values,
|
||||
&(nir_lower_compute_system_values_options){
|
||||
.lower_local_invocation_index =
|
||||
nir->info.derivative_group == DERIVATIVE_GROUP_QUADS ||
|
||||
(sel->screen->info.gfx_level >= GFX12 &&
|
||||
nir->info.derivative_group == DERIVATIVE_GROUP_NONE &&
|
||||
(nir->info.workgroup_size_variable ||
|
||||
(nir->info.workgroup_size[0] % 2 == 0 && nir->info.workgroup_size[1] % 2 == 0)))
|
||||
});
|
||||
|
||||
/* Gfx12 supports this in hw. */
|
||||
if (sel->screen->info.gfx_level < GFX12 &&
|
||||
nir->info.derivative_group == DERIVATIVE_GROUP_QUADS) {
|
||||
NIR_PASS(progress, nir, nir_opt_cse); /* CSE load_local_invocation_id */
|
||||
NIR_PASS(progress, nir, nir_lower_compute_system_values,
|
||||
&(nir_lower_compute_system_values_options){
|
||||
.shuffle_local_ids_for_quad_derivatives = true,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_MESH && !sel->screen->info.mesh_fast_launch_2) {
|
||||
NIR_PASS(progress, nir, nir_lower_compute_system_values,
|
||||
&(nir_lower_compute_system_values_options){
|
||||
/* Mesh shaders run as NGG which can implement local_invocation_index from
|
||||
* the wave ID in merged_wave_info, but they don't have local_invocation_ids
|
||||
* in FAST_LAUNCH=1 mode (the default on GFX10.3, deprecated on GFX11).
|
||||
*/
|
||||
.lower_cs_local_id_to_index = true,
|
||||
/* Mesh shaders only have a 1D "vertex index" which we use
|
||||
* as "workgroup index" to emulate the 3D workgroup ID.
|
||||
*/
|
||||
.lower_workgroup_id_to_index = true,
|
||||
.shortcut_1d_workgroup_id = true,
|
||||
});
|
||||
}
|
||||
|
||||
/* nir_opt_clip_cull_const, si_nir_kill_outputs, and ac_nir_optimize_outputs require outputs
|
||||
* to be scalar.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -328,51 +328,6 @@ static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
|
|||
NIR_PASS(_, nir, ac_nir_lower_mesh_inputs_to_mem);
|
||||
}
|
||||
|
||||
if (mesa_shader_stage_is_compute(nir->info.stage)) {
|
||||
/* gl_LocalInvocationIndex must be derived from gl_LocalInvocationID.xyz to make it correct
|
||||
* with quad derivatives. Using gl_SubgroupID for that (which is what we do by default) is
|
||||
* incorrect with a non-linear thread order.
|
||||
*
|
||||
* On Gfx12, we always use a non-linear thread order if the workgroup X and Y size is
|
||||
* divisible by 2.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_lower_compute_system_values,
|
||||
&(nir_lower_compute_system_values_options){
|
||||
.lower_local_invocation_index =
|
||||
nir->info.derivative_group == DERIVATIVE_GROUP_QUADS ||
|
||||
(sscreen->info.gfx_level >= GFX12 &&
|
||||
nir->info.derivative_group == DERIVATIVE_GROUP_NONE &&
|
||||
(nir->info.workgroup_size_variable ||
|
||||
(nir->info.workgroup_size[0] % 2 == 0 && nir->info.workgroup_size[1] % 2 == 0)))
|
||||
});
|
||||
|
||||
/* Gfx12 supports this in hw. */
|
||||
if (sscreen->info.gfx_level < GFX12 &&
|
||||
nir->info.derivative_group == DERIVATIVE_GROUP_QUADS) {
|
||||
NIR_PASS(_, nir, nir_opt_cse); /* CSE load_local_invocation_id */
|
||||
NIR_PASS(_, nir, nir_lower_compute_system_values,
|
||||
&(nir_lower_compute_system_values_options){
|
||||
.shuffle_local_ids_for_quad_derivatives = true,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_MESH && !sscreen->info.mesh_fast_launch_2) {
|
||||
NIR_PASS(_, nir, nir_lower_compute_system_values,
|
||||
&(nir_lower_compute_system_values_options){
|
||||
/* Mesh shaders run as NGG which can implement local_invocation_index from
|
||||
* the wave ID in merged_wave_info, but they don't have local_invocation_ids
|
||||
* in FAST_LAUNCH=1 mode (the default on GFX10.3, deprecated on GFX11).
|
||||
*/
|
||||
.lower_cs_local_id_to_index = true,
|
||||
/* Mesh shaders only have a 1D "vertex index" which we use
|
||||
* as "workgroup index" to emulate the 3D workgroup ID.
|
||||
*/
|
||||
.lower_workgroup_id_to_index = true,
|
||||
.shortcut_1d_workgroup_id = true,
|
||||
});
|
||||
}
|
||||
|
||||
si_nir_opts(sscreen, nir, true);
|
||||
/* Run late optimizations to fuse ffma and eliminate 16-bit conversions. */
|
||||
si_nir_late_opts(nir);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue