mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-10 01:18:18 +02:00
ir3: Implement round-robin workaround
On later a6xx and a7xx, round-robin does not work properly when there are more than 8 active waves from the same dispatch in the same uSP. We have to clamp the register usage to a minimum to guarantee there aren't more waves. There is a problem for very large workgroups, which will have to be solved the same way as the problem with deep control flow, through implementing ReuseGPRMode. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41562>
This commit is contained in:
parent
25f7765d21
commit
31db17f653
2 changed files with 45 additions and 0 deletions
|
|
@ -298,6 +298,20 @@ ir3_get_reg_independent_max_waves(struct ir3_shader_variant *v,
|
|||
v->name);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* Due to round_robin_errata we may be unable to support forward progress
|
||||
* guarantees between waves if there are more than 8 waves active.
|
||||
*/
|
||||
if (v->cs.round_robin_mode && compiler->info->props.round_robin_errata) {
|
||||
if (waves_per_wg > 8 && v->has_barrier) {
|
||||
mesa_loge(
|
||||
"Compute shader (%s) requires forward progress but uses more "
|
||||
"than 8 waves.",
|
||||
v->name);
|
||||
exit(1);
|
||||
}
|
||||
max_waves = MIN2(max_waves, 8);
|
||||
}
|
||||
}
|
||||
|
||||
return max_waves;
|
||||
|
|
@ -315,6 +329,29 @@ ir3_get_reg_dependent_max_waves(const struct ir3_compiler *compiler,
|
|||
: compiler->info->max_waves;
|
||||
}
|
||||
|
||||
/* Get the minimum number of registers a shader must declare, even if it doesn't
|
||||
* actually use as many.
|
||||
*/
|
||||
unsigned
|
||||
ir3_get_min_reg_count(const struct ir3_shader_variant *v, bool double_threadsize)
|
||||
{
|
||||
if (!ir3_shader_compute(v) || !v->cs.round_robin_mode ||
|
||||
!v->compiler->info->props.round_robin_errata)
|
||||
return 0;
|
||||
|
||||
/* Limit occupancy to work around the round-robin errata. */
|
||||
unsigned max_waves = 8;
|
||||
|
||||
/* We want to find the smallest register size where no more than
|
||||
* (max_waves / wave_granularity) waves fit in reg_size_vec4. Calculate the
|
||||
* maximum register size where (max_waves / wave_granularity + 1) waves fit,
|
||||
* then add 1.
|
||||
*/
|
||||
return (v->compiler->info->props.reg_size_vec4 /
|
||||
((max_waves / v->compiler->info->wave_granularity) *
|
||||
(double_threadsize ? 2 : 1) + 1)) + 1;
|
||||
}
|
||||
|
||||
void
|
||||
ir3_collect_info(struct ir3_shader_variant *v)
|
||||
{
|
||||
|
|
@ -555,6 +592,11 @@ ir3_collect_info(struct ir3_shader_variant *v)
|
|||
|
||||
info->double_threadsize = ir3_should_double_threadsize(v, regs_count);
|
||||
|
||||
/* Limit occupancy if necessary by increasing max_reg. */
|
||||
unsigned min_reg_count = ir3_get_min_reg_count(v, info->double_threadsize);
|
||||
if (min_reg_count > 0)
|
||||
info->max_reg = MAX2(info->max_reg, min_reg_count - 1);
|
||||
|
||||
/* TODO this is different for earlier gens, but earlier gens don't use this */
|
||||
info->subgroup_size = v->info.double_threadsize ? 128 : 64;
|
||||
|
||||
|
|
|
|||
|
|
@ -885,6 +885,9 @@ unsigned ir3_get_reg_dependent_max_waves(const struct ir3_compiler *compiler,
|
|||
unsigned ir3_get_reg_independent_max_waves(struct ir3_shader_variant *v,
|
||||
bool double_threadsize);
|
||||
|
||||
unsigned ir3_get_min_reg_count(const struct ir3_shader_variant *v,
|
||||
bool double_threadsize);
|
||||
|
||||
bool ir3_should_double_threadsize(struct ir3_shader_variant *v,
|
||||
unsigned regs_count);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue