aco: remove occupancy check in dealloc_vgprs()

This didn't consider that there might be different programs using the same
SIMD.

fossil-db (navi31):
Totals from 68129 (85.81% of 79395) affected shaders:
Instrs: 23230924 -> 23388315 (+0.68%)
CodeSize: 120636544 -> 121272888 (+0.53%)
Latency: 115645106 -> 115683965 (+0.03%)
InvThroughput: 18804076 -> 18806912 (+0.02%); split: -0.00%, +0.02%
Branches: 404644 -> 407945 (+0.82%)

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28633>
This commit is contained in:
Rhys Perry 2024-04-08 15:10:39 +01:00 committed by Marge Bot
parent 9775318aa9
commit c2a467dd4b
3 changed files with 24 additions and 13 deletions

View file

@ -1344,11 +1344,6 @@ dealloc_vgprs(Program* program)
if (program->gfx_level < GFX11)
return false;
/* skip if deallocating VGPRs won't increase occupancy */
uint16_t max_waves = max_suitable_waves(program, program->dev.max_waves_per_simd);
if (program->max_reg_demand.vgpr <= get_addr_vgpr_from_waves(program, max_waves))
return false;
/* sendmsg(dealloc_vgprs) releases scratch, so this isn't safe if there is a in-progress scratch
* store. */
if (uses_scratch(program))

View file

@ -180,11 +180,14 @@ BEGIN_TEST(isel.discard_early_exit.mrtz)
);
/* On GFX11, the discard early exit must use mrtz if the shader exports only depth. */
//>> exp mrtz v0, off, off, off done ; $_ $_
//! s_endpgm ; $_
//>> exp mrtz v0, off, off, off done ; $_ $_
//! s_nop 0 ; $_
//! s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; $_
//! s_endpgm ; $_
//! BB1:
//! exp mrtz off, off, off, off done ; $_ $_
//! s_endpgm ; $_
//! exp mrtz off, off, off, off done ; $_ $_
//! s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; $_
//! s_endpgm ; $_
PipelineBuilder pbld(get_vk_device(GFX11));
pbld.add_vsfs(vs, fs);
@ -205,11 +208,14 @@ BEGIN_TEST(isel.discard_early_exit.mrt0)
);
/* On GFX11, the discard early exit must use mrt0 if the shader exports color. */
//>> exp mrt0 v0, v0, v0, v0 done ; $_ $_
//! s_endpgm ; $_
//>> exp mrt0 v0, v0, v0, v0 done ; $_ $_
//! s_nop 0 ; $_
//! s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; $_
//! s_endpgm ; $_
//! BB1:
//! exp mrt0 off, off, off, off done ; $_ $_
//! s_endpgm ; $_
//! exp mrt0 off, off, off, off done ; $_ $_
//! s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; $_
//! s_endpgm ; $_
PipelineBuilder pbld(get_vk_device(GFX11));
pbld.add_vsfs(vs, fs);

View file

@ -355,6 +355,8 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v1b), Definition(v0_b3, v1b),
Operand(v0_b3, v1b), Operand(v0_b1, v1b));
//~gfx11! s_nop
//~gfx11! s_sendmsg sendmsg(dealloc_vgprs)
//~gfx(8|9|11)! s_endpgm
finish_to_hw_instr_test();
@ -516,6 +518,8 @@ BEGIN_TEST(to_hw_instr.subdword_constant)
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(17u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::zero(1));
//~gfx11! s_nop
//~gfx11! s_sendmsg sendmsg(dealloc_vgprs)
//! s_endpgm
finish_to_hw_instr_test();
@ -644,6 +648,8 @@ BEGIN_TEST(to_hw_instr.extract)
finish_to_hw_instr_test();
//~gfx11_.*! s_nop
//~gfx11_.*! s_sendmsg sendmsg(dealloc_vgprs)
//! s_endpgm
}
}
@ -736,6 +742,8 @@ BEGIN_TEST(to_hw_instr.insert)
finish_to_hw_instr_test();
//~gfx11! s_nop
//~gfx11! s_sendmsg sendmsg(dealloc_vgprs)
//! s_endpgm
}
END_TEST
@ -886,6 +894,8 @@ BEGIN_TEST(to_hw_instr.pack2x16_constant)
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
Operand::zero(2), Operand(v1_lo, v2b));
//~gfx11! s_nop
//~gfx11! s_sendmsg sendmsg(dealloc_vgprs)
//! s_endpgm
finish_to_hw_instr_test();