aco: don't create sendmsg(dealloc_vgprs) if scratch is used

LLVM does something similar: https://reviews.llvm.org/D153295

fossil-db (gfx1100):
Totals from 21 (0.02% of 133461) affected shaders:
Instrs: 147428 -> 147396 (-0.02%)
CodeSize: 797188 -> 797060 (-0.02%)

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Fixes: 2930317cea ("aco/gfx11: deallocate VGPRs at the end of the shader")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24669>
(cherry picked from commit c9b177db0e)
This commit is contained in:
Rhys Perry 2023-08-11 20:58:32 +01:00 committed by Eric Engestrom
parent 7c1b0cb73e
commit 0831a42465
4 changed files with 17 additions and 7 deletions

View file

@ -598,7 +598,7 @@
"description": "aco: don't create sendmsg(dealloc_vgprs) if scratch is used",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "2930317cea53843b4f3f2b25f11fba5ba82fda16"
},

View file

@ -1082,6 +1082,11 @@ dealloc_vgprs(Program* program)
if (program->max_reg_demand.vgpr <= get_addr_vgpr_from_waves(program, max_waves))
return false;
/* sendmsg(dealloc_vgprs) releases scratch, so this isn't safe if there is a in-progress scratch
* store. */
if (uses_scratch(program))
return false;
Block& block = program->blocks.back();
/* don't bother checking if there is a pending VMEM store or export: there almost always is */

View file

@ -2298,6 +2298,8 @@ uint16_t get_vgpr_alloc(Program* program, uint16_t addressable_vgprs);
uint16_t get_addr_sgpr_from_waves(Program* program, uint16_t max_waves);
uint16_t get_addr_vgpr_from_waves(Program* program, uint16_t max_waves);
bool uses_scratch(Program* program);
typedef struct {
const int16_t opcode_gfx7[static_cast<int>(aco_opcode::num_opcodes)];
const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)];

View file

@ -298,15 +298,18 @@ calc_waves_per_workgroup(Program* program)
}
} /* end namespace */
bool
uses_scratch(Program* program)
{
/* RT uses scratch but we don't yet know how much. */
return program->config->scratch_bytes_per_wave || program->stage == raytracing_cs;
}
uint16_t
get_extra_sgprs(Program* program)
{
/* We don't use this register on GFX6-8 and it's removed on GFX10+. RT uses scratch but we don't
* yet know how much.
*/
bool needs_flat_scr =
(program->config->scratch_bytes_per_wave || program->stage == raytracing_cs) &&
program->gfx_level == GFX9;
/* We don't use this register on GFX6-8 and it's removed on GFX10+. */
bool needs_flat_scr = uses_scratch(program) && program->gfx_level == GFX9;
if (program->gfx_level >= GFX10) {
assert(!program->dev.xnack_enabled);