aco/ir: add function to parse depctr waits

No Foz-DB changes on Navi31.

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31132>
This commit is contained in:
Georg Lehmann 2024-09-11 16:03:36 +02:00 committed by Marge Bot
parent a061c80629
commit 977f435f4c
4 changed files with 123 additions and 17 deletions

View file

@ -1175,7 +1175,7 @@ handle_lds_direct_valu_hazard_instr(LdsDirectVALUHazardGlobalState& global_state
block_state.num_valu++;
}
if (parse_vdst_wait(instr.get()) == 0)
if (parse_depctr_wait(instr.get()).va_vdst == 0)
return true;
block_state.num_instrs++;
@ -1297,7 +1297,7 @@ handle_valu_partial_forwarding_hazard_instr(VALUPartialForwardingHazardGlobalSta
}
block_state.num_valu_since_read++;
} else if (parse_vdst_wait(instr.get()) == 0) {
} else if (parse_depctr_wait(instr.get()).va_vdst == 0) {
return true;
}
@ -1394,7 +1394,7 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr<Instruction>&
ctx.has_Vcmpx = false;
}
unsigned va_vdst = parse_vdst_wait(instr.get());
unsigned va_vdst = parse_depctr_wait(instr.get()).va_vdst;
unsigned vm_vsrc = 7;
unsigned sa_sdst = 1;
@ -1404,7 +1404,7 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr<Instruction>&
vm_vsrc = 0;
sa_sdst = 0;
} else if (instr->opcode == aco_opcode::s_waitcnt_depctr) {
/* va_vdst already obtained through parse_vdst_wait(). */
/* va_vdst already obtained through parse_depctr_wait(). */
vm_vsrc = (instr->salu().imm >> 2) & 0x7;
sa_sdst = instr->salu().imm & 0x1;
} else if (instr->isLDSDIR() && state.program->gfx_level >= GFX12) {
@ -1600,7 +1600,7 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr<Instruction>&
bool
has_vdst0_since_valu_instr(bool& global_state, unsigned& block_state, aco_ptr<Instruction>& pred)
{
if (parse_vdst_wait(pred.get()) == 0)
if (parse_depctr_wait(pred.get()).va_vdst == 0)
return true;
if (--block_state == 0) {

View file

@ -192,7 +192,7 @@ update_alu(delay_ctx& ctx, bool is_valu, bool is_trans, int cycles)
void
kill_alu(alu_delay_info& delay, Instruction* instr, delay_ctx& ctx)
{
if (parse_vdst_wait(instr) == 0) {
if (parse_depctr_wait(instr).va_vdst == 0) {
std::map<PhysReg, alu_delay_info>::iterator it = ctx.gpr_map.begin();
while (it != ctx.gpr_map.end()) {
alu_delay_info& entry = it->second;

View file

@ -1397,17 +1397,93 @@ get_vmem_type(enum amd_gfx_level gfx_level, Instruction* instr)
return 0;
}
unsigned
parse_vdst_wait(Instruction* instr)
/* Parse implicit data dependency resolution:
* Returns the value of each counter that must be reached
* before an instruction is issued.
*
* (Probably incomplete.)
*/
depctr_wait
parse_depctr_wait(const Instruction* instr)
{
if (instr->isVMEM() || instr->isFlatLike() || instr->isDS() || instr->isEXP())
return 0;
else if (instr->isLDSDIR())
return instr->ldsdir().wait_vdst;
else if (instr->opcode == aco_opcode::s_waitcnt_depctr)
return (instr->salu().imm >> 12) & 0xf;
else
return 15;
depctr_wait res;
if (instr->isVMEM() || instr->isFlatLike() || instr->isDS() || instr->isEXP()) {
res.va_vdst = 0;
res.va_exec = 0;
res.sa_exec = 0;
if (instr->isVMEM() || instr->isFlatLike()) {
res.sa_sdst = 0;
res.va_sdst = 0;
res.va_vcc = 0;
}
} else if (instr->isSMEM()) {
res.sa_sdst = 0;
res.va_sdst = 0;
res.va_vcc = 0;
} else if (instr->isLDSDIR()) {
res.va_vdst = instr->ldsdir().wait_vdst;
res.va_exec = 0;
res.sa_exec = 0;
} else if (instr->opcode == aco_opcode::s_waitcnt_depctr) {
unsigned imm = instr->salu().imm;
res.va_vdst = (imm >> 12) & 0xf;
res.va_sdst = (imm >> 9) & 0x7;
res.va_ssrc = (imm >> 8) & 0x1;
res.hold_cnt = (imm >> 7) & 0x1;
res.vm_vsrc = (imm >> 2) & 0x7;
res.va_vcc = (imm >> 1) & 0x1;
res.sa_sdst = imm & 0x1;
} else if (instr->isVALU()) {
res.sa_exec = 0;
for (const Definition& def : instr->definitions) {
if (def.regClass().type() == RegType::sgpr) {
res.sa_sdst = 0;
/* Notably, this is the only exception, even VALU that
* reads exec doesn't implicitly wait for va_exec.
*/
if (instr->opcode == aco_opcode::v_readfirstlane_b32)
res.va_exec = 0;
break;
}
}
} else if (instr_info.classes[(int)instr->opcode] == instr_class::branch ||
instr_info.classes[(int)instr->opcode] == instr_class::sendmsg) {
res.sa_exec = 0;
res.va_exec = 0;
switch (instr->opcode) {
case aco_opcode::s_cbranch_vccz:
case aco_opcode::s_cbranch_vccnz:
res.va_vcc = 0;
res.sa_sdst = 0;
break;
case aco_opcode::s_cbranch_scc0:
case aco_opcode::s_cbranch_scc1:
res.sa_sdst = 0;
break;
default: break;
}
} else if (instr->isSALU()) {
for (const Definition& def : instr->definitions) {
if (def.physReg() < vcc) {
res.va_sdst = 0;
} else if (def.physReg() <= vcc_hi) {
res.va_vcc = 0;
} else if (def.physReg() == exec || def.physReg() == exec_hi) {
res.va_exec = 0;
}
}
for (const Operand& op : instr->operands) {
if (op.physReg() < vcc) {
res.va_sdst = 0;
} else if (op.physReg() <= vcc_hi) {
res.va_vcc = 0;
} else if (op.physReg() == exec || op.physReg() == exec_hi) {
res.va_exec = 0;
}
}
}
return res;
}
bool

View file

@ -1878,7 +1878,37 @@ enum vmem_type : uint8_t {
*/
uint8_t get_vmem_type(enum amd_gfx_level gfx_level, Instruction* instr);
unsigned parse_vdst_wait(Instruction* instr);
/* For all of the counters, the maximum value means no wait.
* Some of the counters are larger than their bit field,
* but there is no wait mechanism that allows waiting only for higher values.
*/
struct depctr_wait {
union {
struct {
/* VALU completion, apparently even used for VALU without vgpr writes. */
unsigned va_vdst : 4;
/* VALU sgpr write (not including vcc/vcc_hi). */
unsigned va_sdst : 3;
/* VALU sgpr read. */
unsigned va_ssrc : 1;
/* unknown. */
unsigned hold_cnt : 1;
/* VMEM/DS vgpr read. */
unsigned vm_vsrc : 3;
/* VALU vcc/vcc_hi write. */
unsigned va_vcc : 1;
/* SALU sgpr, vcc/vcc_hi or scc write. */
unsigned sa_sdst : 1;
/* VALU exec/exec_hi write. */
unsigned va_exec : 1;
/* SALU exec/exec_hi write. */
unsigned sa_exec : 1;
};
unsigned packed = -1;
};
};
depctr_wait parse_depctr_wait(const Instruction* instr);
enum block_kind {
/* uniform indicates that leaving this block,