diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 8d6760695a5..07eae73ef56 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -1344,11 +1344,6 @@ dealloc_vgprs(Program* program) if (program->gfx_level < GFX11) return false; - /* skip if deallocating VGPRs won't increase occupancy */ - uint16_t max_waves = max_suitable_waves(program, program->dev.max_waves_per_simd); - if (program->max_reg_demand.vgpr <= get_addr_vgpr_from_waves(program, max_waves)) - return false; - /* sendmsg(dealloc_vgprs) releases scratch, so this isn't safe if there is a in-progress scratch * store. */ if (uses_scratch(program)) diff --git a/src/amd/compiler/tests/test_isel.cpp b/src/amd/compiler/tests/test_isel.cpp index bfc758fb387..96c7d8040fa 100644 --- a/src/amd/compiler/tests/test_isel.cpp +++ b/src/amd/compiler/tests/test_isel.cpp @@ -180,11 +180,14 @@ BEGIN_TEST(isel.discard_early_exit.mrtz) ); /* On GFX11, the discard early exit must use mrtz if the shader exports only depth. */ - //>> exp mrtz v0, off, off, off done ; $_ $_ - //! s_endpgm ; $_ + //>> exp mrtz v0, off, off, off done ; $_ $_ + //! s_nop 0 ; $_ + //! s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; $_ + //! s_endpgm ; $_ //! BB1: - //! exp mrtz off, off, off, off done ; $_ $_ - //! s_endpgm ; $_ + //! exp mrtz off, off, off, off done ; $_ $_ + //! s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; $_ + //! s_endpgm ; $_ PipelineBuilder pbld(get_vk_device(GFX11)); pbld.add_vsfs(vs, fs); @@ -205,11 +208,14 @@ BEGIN_TEST(isel.discard_early_exit.mrt0) ); /* On GFX11, the discard early exit must use mrt0 if the shader exports color. */ - //>> exp mrt0 v0, v0, v0, v0 done ; $_ $_ - //! s_endpgm ; $_ + //>> exp mrt0 v0, v0, v0, v0 done ; $_ $_ + //! s_nop 0 ; $_ + //! s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; $_ + //! s_endpgm ; $_ //! BB1: - //! exp mrt0 off, off, off, off done ; $_ $_ - //! s_endpgm ; $_ + //! exp mrt0 off, off, off, off done ; $_ $_ + //! s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; $_ + //! s_endpgm ; $_ PipelineBuilder pbld(get_vk_device(GFX11)); pbld.add_vsfs(vs, fs); diff --git a/src/amd/compiler/tests/test_to_hw_instr.cpp b/src/amd/compiler/tests/test_to_hw_instr.cpp index 14ddfcd4d1f..81667082c25 100644 --- a/src/amd/compiler/tests/test_to_hw_instr.cpp +++ b/src/amd/compiler/tests/test_to_hw_instr.cpp @@ -355,6 +355,8 @@ BEGIN_TEST(to_hw_instr.swap_subdword) bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v1b), Definition(v0_b3, v1b), Operand(v0_b3, v1b), Operand(v0_b1, v1b)); + //~gfx11! s_nop + //~gfx11! s_sendmsg sendmsg(dealloc_vgprs) //~gfx(8|9|11)! s_endpgm finish_to_hw_instr_test(); @@ -516,6 +518,8 @@ BEGIN_TEST(to_hw_instr.subdword_constant) bld.pseudo(aco_opcode::p_unit_test, Operand::c32(17u)); bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::zero(1)); + //~gfx11! s_nop + //~gfx11! s_sendmsg sendmsg(dealloc_vgprs) //! s_endpgm finish_to_hw_instr_test(); @@ -644,6 +648,8 @@ BEGIN_TEST(to_hw_instr.extract) finish_to_hw_instr_test(); + //~gfx11_.*! s_nop + //~gfx11_.*! s_sendmsg sendmsg(dealloc_vgprs) //! s_endpgm } } @@ -736,6 +742,8 @@ BEGIN_TEST(to_hw_instr.insert) finish_to_hw_instr_test(); + //~gfx11! s_nop + //~gfx11! s_sendmsg sendmsg(dealloc_vgprs) //! s_endpgm } END_TEST @@ -886,6 +894,8 @@ BEGIN_TEST(to_hw_instr.pack2x16_constant) bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), Operand::zero(2), Operand(v1_lo, v2b)); + //~gfx11! s_nop + //~gfx11! s_sendmsg sendmsg(dealloc_vgprs) //! s_endpgm finish_to_hw_instr_test();