mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
aco: use null operand for SOPK s_waitcnt
Both null def and op result in the same correct encoding, but these instructions optionally read a sgpr, so it makes more sense to use an operand. Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26163>
This commit is contained in:
parent
bcf0425f7f
commit
e49c413a86
4 changed files with 24 additions and 25 deletions
|
|
@ -1033,7 +1033,7 @@ handle_instruction_gfx10(State& state, NOP_ctx_gfx10& ctx, aco_ptr<Instruction>&
|
|||
/* Reducing lgkmcnt count to 0 always mitigates the hazard. */
|
||||
const SOPP_instruction& sopp = instr->sopp();
|
||||
if (sopp.opcode == aco_opcode::s_waitcnt_lgkmcnt) {
|
||||
if (sopp.imm == 0 && sopp.definitions[0].physReg() == sgpr_null)
|
||||
if (sopp.imm == 0 && sopp.operands[0].physReg() == sgpr_null)
|
||||
ctx.sgprs_read_by_SMEM.reset();
|
||||
} else if (sopp.opcode == aco_opcode::s_waitcnt) {
|
||||
wait_imm imm(state.program->gfx_level, instr->sopp().imm);
|
||||
|
|
@ -1048,12 +1048,12 @@ handle_instruction_gfx10(State& state, NOP_ctx_gfx10& ctx, aco_ptr<Instruction>&
|
|||
*/
|
||||
if (instr->isVMEM() || instr->isGlobal() || instr->isScratch()) {
|
||||
if (ctx.has_branch_after_DS)
|
||||
bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
|
||||
bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0);
|
||||
ctx.has_branch_after_VMEM = ctx.has_branch_after_DS = ctx.has_DS = false;
|
||||
ctx.has_VMEM = true;
|
||||
} else if (instr->isDS()) {
|
||||
if (ctx.has_branch_after_VMEM)
|
||||
bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
|
||||
bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0);
|
||||
ctx.has_branch_after_VMEM = ctx.has_branch_after_DS = ctx.has_VMEM = false;
|
||||
ctx.has_DS = true;
|
||||
} else if (instr_is_branch(instr)) {
|
||||
|
|
@ -1063,7 +1063,7 @@ handle_instruction_gfx10(State& state, NOP_ctx_gfx10& ctx, aco_ptr<Instruction>&
|
|||
} else if (instr->opcode == aco_opcode::s_waitcnt_vscnt) {
|
||||
/* Only s_waitcnt_vscnt can mitigate the hazard */
|
||||
const SOPK_instruction& sopk = instr->sopk();
|
||||
if (sopk.definitions[0].physReg() == sgpr_null && sopk.imm == 0)
|
||||
if (sopk.operands[0].physReg() == sgpr_null && sopk.imm == 0)
|
||||
ctx.has_VMEM = ctx.has_branch_after_VMEM = ctx.has_DS = ctx.has_branch_after_DS = false;
|
||||
}
|
||||
|
||||
|
|
@ -1142,7 +1142,7 @@ resolve_all_gfx10(State& state, NOP_ctx_gfx10& ctx,
|
|||
|
||||
/* LdsBranchVmemWARHazard */
|
||||
if (ctx.has_VMEM || ctx.has_branch_after_VMEM || ctx.has_DS || ctx.has_branch_after_DS) {
|
||||
bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
|
||||
bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0);
|
||||
ctx.has_VMEM = ctx.has_branch_after_VMEM = ctx.has_DS = ctx.has_branch_after_DS = false;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -447,8 +447,7 @@ check_instr(wait_ctx& ctx, wait_imm& wait, alu_delay_info& delay, Instruction* i
|
|||
bool
|
||||
parse_wait_instr(wait_ctx& ctx, wait_imm& imm, Instruction* instr)
|
||||
{
|
||||
if (instr->opcode == aco_opcode::s_waitcnt_vscnt &&
|
||||
instr->definitions[0].physReg() == sgpr_null) {
|
||||
if (instr->opcode == aco_opcode::s_waitcnt_vscnt && instr->operands[0].physReg() == sgpr_null) {
|
||||
imm.vs = std::min<uint8_t>(imm.vs, instr->sopk().imm);
|
||||
return true;
|
||||
} else if (instr->opcode == aco_opcode::s_waitcnt) {
|
||||
|
|
@ -995,8 +994,8 @@ emit_waitcnt(wait_ctx& ctx, std::vector<aco_ptr<Instruction>>& instructions, wai
|
|||
if (imm.vs != wait_imm::unset_counter) {
|
||||
assert(ctx.gfx_level >= GFX10);
|
||||
SOPK_instruction* waitcnt_vs =
|
||||
create_instruction<SOPK_instruction>(aco_opcode::s_waitcnt_vscnt, Format::SOPK, 0, 1);
|
||||
waitcnt_vs->definitions[0] = Definition(sgpr_null, s1);
|
||||
create_instruction<SOPK_instruction>(aco_opcode::s_waitcnt_vscnt, Format::SOPK, 1, 0);
|
||||
waitcnt_vs->operands[0] = Operand(sgpr_null, s1);
|
||||
waitcnt_vs->imm = imm.vs;
|
||||
instructions.emplace_back(waitcnt_vs);
|
||||
imm.vs = wait_imm::unset_counter;
|
||||
|
|
|
|||
|
|
@ -2472,7 +2472,7 @@ lower_to_hw_instr(Program* program)
|
|||
* waitcnt insertion doesn't work in a discard early exit block.
|
||||
*/
|
||||
if (program->gfx_level >= GFX10)
|
||||
bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
|
||||
bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0);
|
||||
wait_imm pops_exit_wait_imm;
|
||||
pops_exit_wait_imm.vm = 0;
|
||||
if (program->has_smem_buffer_or_global_loads)
|
||||
|
|
|
|||
|
|
@ -264,11 +264,11 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
|
|||
|
||||
//! p_unit_test 9
|
||||
//! buffer_store_dword %0:s[0-3], %0:v[0], 0, %0:v[0] offen
|
||||
//! s1: %0:null = s_waitcnt_vscnt imm:0
|
||||
//! s_waitcnt_vscnt %0:null imm:0
|
||||
//! s1: %0:s[0] = s_mov_b32 0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9));
|
||||
create_mubuf_store();
|
||||
bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
|
||||
bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0);
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(0), s1), Operand::zero());
|
||||
|
||||
//! p_unit_test 10
|
||||
|
|
@ -284,12 +284,12 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
|
|||
/* VMEM/LDS with the wrong waitcnt in-between */
|
||||
//! p_unit_test 11
|
||||
//! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offen
|
||||
//! s1: %0:null = s_waitcnt_vscnt imm:0
|
||||
//! s_waitcnt_vscnt %0:null imm:0
|
||||
//! s_waitcnt_depctr vm_vsrc(0)
|
||||
//! s1: %0:s[0] = s_mov_b32 0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11));
|
||||
create_mubuf(0);
|
||||
bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
|
||||
bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0);
|
||||
bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(0), s1), Operand::zero());
|
||||
|
||||
//! p_unit_test 12
|
||||
|
|
@ -530,11 +530,11 @@ BEGIN_TEST(insert_nops.lds_direct_vmem)
|
|||
|
||||
//! p_unit_test 9
|
||||
//! buffer_store_dword %0:s[0-3], %0:v[0], 0, %0:v[0] offen
|
||||
//! s1: %0:null = s_waitcnt_vscnt imm:0
|
||||
//! s_waitcnt_vscnt %0:null imm:0
|
||||
//! v1: %0:v[0] = lds_direct_load %0:m0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9));
|
||||
create_mubuf_store();
|
||||
bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
|
||||
bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0);
|
||||
bld.ldsdir(aco_opcode::lds_direct_load, Definition(PhysReg(256), v1), Operand(m0, s1));
|
||||
|
||||
//! p_unit_test 10
|
||||
|
|
@ -549,12 +549,12 @@ BEGIN_TEST(insert_nops.lds_direct_vmem)
|
|||
/* VMEM/LDS with the wrong waitcnt in-between */
|
||||
//! p_unit_test 11
|
||||
//! v1: %0:v[1] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offen
|
||||
//! s1: %0:null = s_waitcnt_vscnt imm:0
|
||||
//! s_waitcnt_vscnt %0:null imm:0
|
||||
//! s_waitcnt_depctr vm_vsrc(0)
|
||||
//! v1: %0:v[0] = lds_direct_load %0:m0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11));
|
||||
create_mubuf(0, PhysReg(257));
|
||||
bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
|
||||
bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0);
|
||||
bld.ldsdir(aco_opcode::lds_direct_load, Definition(PhysReg(256), v1), Operand(m0, s1));
|
||||
|
||||
//! p_unit_test 12
|
||||
|
|
@ -579,12 +579,12 @@ BEGIN_TEST(insert_nops.lds_direct_vmem)
|
|||
|
||||
//! p_unit_test 14
|
||||
//! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[1], 0 offen
|
||||
//! s1: %0:null = s_waitcnt_vscnt imm:0
|
||||
//! s_waitcnt_vscnt %0:null imm:0
|
||||
//! s_waitcnt_depctr vm_vsrc(0)
|
||||
//! v1: %0:v[0] = lds_direct_load %0:m0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14));
|
||||
create_mubuf(0, PhysReg(256), PhysReg(257));
|
||||
bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
|
||||
bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1), 0);
|
||||
bld.ldsdir(aco_opcode::lds_direct_load, Definition(PhysReg(256), v1), Operand(m0, s1));
|
||||
|
||||
finish_insert_nops_test();
|
||||
|
|
@ -1165,12 +1165,12 @@ BEGIN_TEST(insert_nops.setpc_gfx10)
|
|||
/* VMEMtoScalarWriteHazard */
|
||||
//! p_unit_test 2
|
||||
//! v1: %0:v[0] = ds_read_b32 %0:v[0]
|
||||
//! s1: %0:null = s_waitcnt_vscnt imm:0
|
||||
//! s_waitcnt_vscnt %0:null imm:0
|
||||
//! s_waitcnt_depctr vm_vsrc(0)
|
||||
//! s_setpc_b64 0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2));
|
||||
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1));
|
||||
bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1),
|
||||
bld.sopk(aco_opcode::s_waitcnt_vscnt, Operand(sgpr_null, s1),
|
||||
0); /* reset LdsBranchVmemWARHazard */
|
||||
bld.sop1(aco_opcode::s_setpc_b64, Operand::zero(8));
|
||||
|
||||
|
|
@ -1188,7 +1188,7 @@ BEGIN_TEST(insert_nops.setpc_gfx10)
|
|||
//! v1: %0:v[0] = ds_read_b32 %0:v[0]
|
||||
//! v_nop
|
||||
//! s_branch
|
||||
//! s1: %0:null = s_waitcnt_vscnt imm:0
|
||||
//! s_waitcnt_vscnt %0:null imm:0
|
||||
//! s_setpc_b64 0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
|
||||
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1));
|
||||
|
|
@ -1199,7 +1199,7 @@ BEGIN_TEST(insert_nops.setpc_gfx10)
|
|||
//! p_unit_test 5
|
||||
//! v1: %0:v[0] = ds_read_b32 %0:v[0]
|
||||
//! v_nop
|
||||
//! s1: %0:null = s_waitcnt_vscnt imm:0
|
||||
//! s_waitcnt_vscnt %0:null imm:0
|
||||
//! s_setpc_b64 0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5));
|
||||
bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1));
|
||||
|
|
@ -1234,7 +1234,7 @@ BEGIN_TEST(insert_nops.setpc_gfx10)
|
|||
//>> p_unit_test 8
|
||||
//! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2], %0:v[4], %0:v[6], %0:v[8], %0:v[10] 2d
|
||||
//! s_waitcnt_depctr vm_vsrc(0)
|
||||
//! s1: %0:null = s_waitcnt_vscnt imm:0
|
||||
//! s_waitcnt_vscnt %0:null imm:0
|
||||
create_program(GFX10, compute_cs, 64, CHIP_UNKNOWN);
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8));
|
||||
create_mimg(true, 6, 4);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue