aco: use null operand for SOPK s_waitcnt

Both null def and op result in the same correct encoding, but these
instructions optionally read a sgpr, so it makes more sense to use an operand.

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26163>
This commit is contained in:
Georg Lehmann 2023-11-11 10:46:13 +01:00 committed by Marge Bot
parent bcf0425f7f
commit e49c413a86
4 changed files with 24 additions and 25 deletions

View file

@ -1033,7 +1033,7 @@ handle_instruction_gfx10(State& state, NOP_ctx_gfx10& ctx, aco_ptr<Instruction>&
/* Reducing lgkmcnt count to 0 always mitigates the hazard. */
const SOPP_instruction& sopp = instr->sopp();
if (sopp.opcode == aco_opcode::s_waitcnt_lgkmcnt) {
if (sopp.imm == 0 && sopp.definitions[0].physReg() == sgpr_null)
if (sopp.imm == 0 && sopp.operands[0].physReg() == sgpr_null)
ctx.sgprs_read_by_SMEM.reset();
} else if (sopp.opcode == aco_opcode::s_waitcnt) {
wait_imm imm(state.program->gfx_level, instr->sopp().imm);
@ -1048,12 +1048,12 @@ handle_instruction_gfx10(State& state, NOP_ctx_gfx10& ctx, aco_ptr<Instruction>&
*/
if (instr->isVMEM() || instr->isGlobal() || instr->isScratch()) {
if (ctx.has_branch_after_DS)
bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0);
ctx.has_branch_after_VMEM = ctx.has_branch_after_DS = ctx.has_DS = false;
ctx.has_VMEM = true;
} else if (instr->isDS()) {
if (ctx.has_branch_after_VMEM)
bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0);
ctx.has_branch_after_VMEM = ctx.has_branch_after_DS = ctx.has_VMEM = false;
ctx.has_DS = true;
} else if (instr_is_branch(instr)) {
@ -1063,7 +1063,7 @@ handle_instruction_gfx10(State& state, NOP_ctx_gfx10& ctx, aco_ptr<Instruction>&
} else if (instr->opcode == aco_opcode::s_waitcnt_vscnt) {
/* Only s_waitcnt_vscnt can mitigate the hazard */
const SOPK_instruction& sopk = instr->sopk();
if (sopk.definitions[0].physReg() == sgpr_null && sopk.imm == 0)
if (sopk.operands[0].physReg() == sgpr_null && sopk.imm == 0)
ctx.has_VMEM = ctx.has_branch_after_VMEM = ctx.has_DS = ctx.has_branch_after_DS = false;
}
@ -1142,7 +1142,7 @@ resolve_all_gfx10(State& state, NOP_ctx_gfx10& ctx,
/* LdsBranchVmemWARHazard */
if (ctx.has_VMEM || ctx.has_branch_after_VMEM || ctx.has_DS || ctx.has_branch_after_DS) {
bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0);
ctx.has_VMEM = ctx.has_branch_after_VMEM = ctx.has_DS = ctx.has_branch_after_DS = false;
}

View file

@ -447,8 +447,7 @@ check_instr(wait_ctx& ctx, wait_imm& wait, alu_delay_info& delay, Instruction* i
bool
parse_wait_instr(wait_ctx& ctx, wait_imm& imm, Instruction* instr)
{
if (instr->opcode == aco_opcode::s_waitcnt_vscnt &&
instr->definitions[0].physReg() == sgpr_null) {
if (instr->opcode == aco_opcode::s_waitcnt_vscnt && instr->operands[0].physReg() == sgpr_null) {
imm.vs = std::min<uint8_t>(imm.vs, instr->sopk().imm);
return true;
} else if (instr->opcode == aco_opcode::s_waitcnt) {
@ -995,8 +994,8 @@ emit_waitcnt(wait_ctx& ctx, std::vector<aco_ptr<Instruction>>& instructions, wai
if (imm.vs != wait_imm::unset_counter) {
assert(ctx.gfx_level >= GFX10);
SOPK_instruction* waitcnt_vs =
create_instruction<SOPK_instruction>(aco_opcode::s_waitcnt_vscnt, Format::SOPK, 0, 1);
waitcnt_vs->definitions[0] = Definition(sgpr_null, s1);
create_instruction<SOPK_instruction>(aco_opcode::s_waitcnt_vscnt, Format::SOPK, 1, 0);
waitcnt_vs->operands[0] = Operand(sgpr_null, s1);
waitcnt_vs->imm = imm.vs;
instructions.emplace_back(waitcnt_vs);
imm.vs = wait_imm::unset_counter;

View file

@ -2472,7 +2472,7 @@ lower_to_hw_instr(Program* program)
* waitcnt insertion doesn't work in a discard early exit block.
*/
if (program->gfx_level >= GFX10)
bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0);
wait_imm pops_exit_wait_imm;
pops_exit_wait_imm.vm = 0;
if (program->has_smem_buffer_or_global_loads)

View file

@ -264,11 +264,11 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
//! p_unit_test 9
//! buffer_store_dword %0:s[0-3], %0:v[0], 0, %0:v[0] offen
//! s1: %0:null = s_waitcnt_vscnt imm:0
//! s_waitcnt_vscnt %0:null imm:0
//! s1: %0:s[0] = s_mov_b32 0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9));
create_mubuf_store();
bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0);
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(0), s1), Operand::zero());
//! p_unit_test 10
@ -284,12 +284,12 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
/* VMEM/LDS with the wrong waitcnt in-between */
//! p_unit_test 11
//! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offen
//! s1: %0:null = s_waitcnt_vscnt imm:0
//! s_waitcnt_vscnt %0:null imm:0
//! s_waitcnt_depctr vm_vsrc(0)
//! s1: %0:s[0] = s_mov_b32 0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11));
create_mubuf(0);
bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0);
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(0), s1), Operand::zero());
//! p_unit_test 12
@ -530,11 +530,11 @@ BEGIN_TEST(insert_nops.lds_direct_vmem)
//! p_unit_test 9
//! buffer_store_dword %0:s[0-3], %0:v[0], 0, %0:v[0] offen
//! s1: %0:null = s_waitcnt_vscnt imm:0
//! s_waitcnt_vscnt %0:null imm:0
//! v1: %0:v[0] = lds_direct_load %0:m0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9));
create_mubuf_store();
bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0);
bld.ldsdir(aco_opcode::lds_direct_load, Definition(PhysReg(256), v1), Operand(m0, s1));
//! p_unit_test 10
@ -549,12 +549,12 @@ BEGIN_TEST(insert_nops.lds_direct_vmem)
/* VMEM/LDS with the wrong waitcnt in-between */
//! p_unit_test 11
//! v1: %0:v[1] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offen
//! s1: %0:null = s_waitcnt_vscnt imm:0
//! s_waitcnt_vscnt %0:null imm:0
//! s_waitcnt_depctr vm_vsrc(0)
//! v1: %0:v[0] = lds_direct_load %0:m0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11));
create_mubuf(0, PhysReg(257));
bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0);
bld.ldsdir(aco_opcode::lds_direct_load, Definition(PhysReg(256), v1), Operand(m0, s1));
//! p_unit_test 12
@ -579,12 +579,12 @@ BEGIN_TEST(insert_nops.lds_direct_vmem)
//! p_unit_test 14
//! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[1], 0 offen
//! s1: %0:null = s_waitcnt_vscnt imm:0
//! s_waitcnt_vscnt %0:null imm:0
//! s_waitcnt_depctr vm_vsrc(0)
//! v1: %0:v[0] = lds_direct_load %0:m0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14));
create_mubuf(0, PhysReg(256), PhysReg(257));
bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0);
bld.ldsdir(aco_opcode::lds_direct_load, Definition(PhysReg(256), v1), Operand(m0, s1));
finish_insert_nops_test();
@ -1165,12 +1165,12 @@ BEGIN_TEST(insert_nops.setpc_gfx10)
/* VMEMtoScalarWriteHazard */
//! p_unit_test 2
//! v1: %0:v[0] = ds_read_b32 %0:v[0]
//! s1: %0:null = s_waitcnt_vscnt imm:0
//! s_waitcnt_vscnt %0:null imm:0
//! s_waitcnt_depctr vm_vsrc(0)
//! s_setpc_b64 0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2));
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1));
bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1),
bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1),
0); /* reset LdsBranchVmemWARHazard */
bld.sop1(aco_opcode::s_setpc_b64, Operand::zero(8));
@ -1188,7 +1188,7 @@ BEGIN_TEST(insert_nops.setpc_gfx10)
//! v1: %0:v[0] = ds_read_b32 %0:v[0]
//! v_nop
//! s_branch
//! s1: %0:null = s_waitcnt_vscnt imm:0
//! s_waitcnt_vscnt %0:null imm:0
//! s_setpc_b64 0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1));
@ -1199,7 +1199,7 @@ BEGIN_TEST(insert_nops.setpc_gfx10)
//! p_unit_test 5
//! v1: %0:v[0] = ds_read_b32 %0:v[0]
//! v_nop
//! s1: %0:null = s_waitcnt_vscnt imm:0
//! s_waitcnt_vscnt %0:null imm:0
//! s_setpc_b64 0
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5));
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1));
@ -1234,7 +1234,7 @@ BEGIN_TEST(insert_nops.setpc_gfx10)
//>> p_unit_test 8
//! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2], %0:v[4], %0:v[6], %0:v[8], %0:v[10] 2d
//! s_waitcnt_depctr vm_vsrc(0)
//! s1: %0:null = s_waitcnt_vscnt imm:0
//! s_waitcnt_vscnt %0:null imm:0
create_program(GFX10, compute_cs, 64, CHIP_UNKNOWN);
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8));
create_mimg(true, 6, 4);