diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp index dfab79f5d85..896452e9d6c 100644 --- a/src/amd/compiler/aco_scheduler.cpp +++ b/src/amd/compiler/aco_scheduler.cpp @@ -1246,7 +1246,6 @@ schedule_program(Program* program) /* Allowing the scheduler to reduce the number of waves to as low as 5 * improves performance of Thrones of Britannia significantly and doesn't * seem to hurt anything else. */ - // TODO: account for possible uneven num_waves on GFX10+ unsigned wave_fac = program->dev.physical_vgprs / 256; if (program->num_waves <= 5 * wave_fac) ctx.num_waves = program->num_waves; @@ -1260,17 +1259,13 @@ schedule_program(Program* program) ctx.num_waves = std::min(ctx.num_waves, program->num_waves); ctx.num_waves = max_suitable_waves(program, ctx.num_waves); + assert(ctx.num_waves >= program->min_waves); + ctx.mv.max_registers = {int16_t(get_addr_vgpr_from_waves(program, ctx.num_waves) - 2), + int16_t(get_addr_sgpr_from_waves(program, ctx.num_waves))}; + /* VMEM_MAX_MOVES and such assume pre-GFX10 wave count */ ctx.num_waves = std::max(ctx.num_waves / wave_fac, 1); - assert(ctx.num_waves > 0); - ctx.mv.max_registers = { - int16_t(get_addr_vgpr_from_waves( - program, std::max(ctx.num_waves * wave_fac, program->min_waves)) - - 2), - int16_t(get_addr_sgpr_from_waves( - program, std::max(ctx.num_waves * wave_fac, program->min_waves)))}; - /* NGG culling shaders are very sensitive to position export scheduling. * Schedule less aggressively when early primitive export is used, and * keep the position export at the very bottom when late primitive export is used.