aco/waitcnt: fix DS/VMEM ordered writes when mixed

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28981>
(cherry picked from commit 5b1b09ad42)
This commit is contained in:
Rhys Perry 2024-04-24 16:57:10 +01:00 committed by Eric Engestrom
parent d2fbe79d37
commit ee40beb60d
3 changed files with 74 additions and 4 deletions

View file

@ -244,7 +244,7 @@
"description": "aco/waitcnt: fix DS/VMEM ordered writes when mixed",
"nominated": true,
"nomination_type": 0,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": null,
"notes": null

View file

@ -411,18 +411,20 @@ check_instr(wait_ctx& ctx, wait_imm& wait, alu_delay_info& delay, Instruction* i
if (it == ctx.gpr_map.end())
continue;
wait_imm reg_imm = it->second.imm;
/* Vector Memory reads and writes return in the order they were issued */
uint8_t vmem_type = get_vmem_type(instr);
if (vmem_type && ((it->second.events & vm_events) == event_vmem) &&
it->second.vmem_types == vmem_type)
continue;
reg_imm.vm = wait_imm::unset_counter;
/* LDS reads and writes return in the order they were issued. same for GDS */
if (instr->isDS() &&
(it->second.events & lgkm_events) == (instr->ds().gds ? event_gds : event_lds))
continue;
reg_imm.lgkm = wait_imm::unset_counter;
wait.combine(it->second.imm);
wait.combine(reg_imm);
}
}
}

View file

@ -111,3 +111,71 @@ BEGIN_TEST(insert_waitcnt.clause)
finish_waitcnt_test();
END_TEST
BEGIN_TEST(insert_waitcnt.waw.mixed_vmem_lds.vmem)
if (!setup_cs(NULL, GFX10))
return;
Definition def_v4(PhysReg(260), v1);
Operand op_v0(PhysReg(256), v1);
Operand desc0(PhysReg(0), s4);
//>> BB0
//! /* logical preds: / linear preds: / kind: top-level, */
//! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0
bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v0, Operand::zero(), 0, false);
//>> BB1
//! /* logical preds: / linear preds: / kind: */
//! v1: %0:v[4] = ds_read_b32 %0:v[0]
bld.reset(program->create_and_insert_block());
bld.ds(aco_opcode::ds_read_b32, def_v4, op_v0);
bld.reset(program->create_and_insert_block());
program->blocks[2].linear_preds.push_back(0);
program->blocks[2].linear_preds.push_back(1);
program->blocks[2].logical_preds.push_back(0);
program->blocks[2].logical_preds.push_back(1);
//>> BB2
//! /* logical preds: BB0, BB1, / linear preds: BB0, BB1, / kind: uniform, */
//! s_waitcnt lgkmcnt(0)
//! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0
bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v0, Operand::zero(), 0, false);
finish_waitcnt_test();
END_TEST
BEGIN_TEST(insert_waitcnt.waw.mixed_vmem_lds.lds)
if (!setup_cs(NULL, GFX10))
return;
Definition def_v4(PhysReg(260), v1);
Operand op_v0(PhysReg(256), v1);
Operand desc0(PhysReg(0), s4);
//>> BB0
//! /* logical preds: / linear preds: / kind: top-level, */
//! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0
bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v0, Operand::zero(), 0, false);
//>> BB1
//! /* logical preds: / linear preds: / kind: */
//! v1: %0:v[4] = ds_read_b32 %0:v[0]
bld.reset(program->create_and_insert_block());
bld.ds(aco_opcode::ds_read_b32, def_v4, op_v0);
bld.reset(program->create_and_insert_block());
program->blocks[2].linear_preds.push_back(0);
program->blocks[2].linear_preds.push_back(1);
program->blocks[2].logical_preds.push_back(0);
program->blocks[2].logical_preds.push_back(1);
//>> BB2
//! /* logical preds: BB0, BB1, / linear preds: BB0, BB1, / kind: uniform, */
//! s_waitcnt vmcnt(0)
//! v1: %0:v[4] = ds_read_b32 %0:v[0]
bld.ds(aco_opcode::ds_read_b32, def_v4, op_v0);
finish_waitcnt_test();
END_TEST