mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 16:08:04 +02:00
aco/ir: add function to parse depctr waits
No Foz-DB changes on Navi31. Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31132>
This commit is contained in:
parent
a061c80629
commit
977f435f4c
4 changed files with 123 additions and 17 deletions
|
|
@ -1175,7 +1175,7 @@ handle_lds_direct_valu_hazard_instr(LdsDirectVALUHazardGlobalState& global_state
|
|||
block_state.num_valu++;
|
||||
}
|
||||
|
||||
if (parse_vdst_wait(instr.get()) == 0)
|
||||
if (parse_depctr_wait(instr.get()).va_vdst == 0)
|
||||
return true;
|
||||
|
||||
block_state.num_instrs++;
|
||||
|
|
@ -1297,7 +1297,7 @@ handle_valu_partial_forwarding_hazard_instr(VALUPartialForwardingHazardGlobalSta
|
|||
}
|
||||
|
||||
block_state.num_valu_since_read++;
|
||||
} else if (parse_vdst_wait(instr.get()) == 0) {
|
||||
} else if (parse_depctr_wait(instr.get()).va_vdst == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -1394,7 +1394,7 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr<Instruction>&
|
|||
ctx.has_Vcmpx = false;
|
||||
}
|
||||
|
||||
unsigned va_vdst = parse_vdst_wait(instr.get());
|
||||
unsigned va_vdst = parse_depctr_wait(instr.get()).va_vdst;
|
||||
unsigned vm_vsrc = 7;
|
||||
unsigned sa_sdst = 1;
|
||||
|
||||
|
|
@ -1404,7 +1404,7 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr<Instruction>&
|
|||
vm_vsrc = 0;
|
||||
sa_sdst = 0;
|
||||
} else if (instr->opcode == aco_opcode::s_waitcnt_depctr) {
|
||||
/* va_vdst already obtained through parse_vdst_wait(). */
|
||||
/* va_vdst already obtained through parse_depctr_wait(). */
|
||||
vm_vsrc = (instr->salu().imm >> 2) & 0x7;
|
||||
sa_sdst = instr->salu().imm & 0x1;
|
||||
} else if (instr->isLDSDIR() && state.program->gfx_level >= GFX12) {
|
||||
|
|
@ -1600,7 +1600,7 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr<Instruction>&
|
|||
bool
|
||||
has_vdst0_since_valu_instr(bool& global_state, unsigned& block_state, aco_ptr<Instruction>& pred)
|
||||
{
|
||||
if (parse_vdst_wait(pred.get()) == 0)
|
||||
if (parse_depctr_wait(pred.get()).va_vdst == 0)
|
||||
return true;
|
||||
|
||||
if (--block_state == 0) {
|
||||
|
|
|
|||
|
|
@ -192,7 +192,7 @@ update_alu(delay_ctx& ctx, bool is_valu, bool is_trans, int cycles)
|
|||
void
|
||||
kill_alu(alu_delay_info& delay, Instruction* instr, delay_ctx& ctx)
|
||||
{
|
||||
if (parse_vdst_wait(instr) == 0) {
|
||||
if (parse_depctr_wait(instr).va_vdst == 0) {
|
||||
std::map<PhysReg, alu_delay_info>::iterator it = ctx.gpr_map.begin();
|
||||
while (it != ctx.gpr_map.end()) {
|
||||
alu_delay_info& entry = it->second;
|
||||
|
|
|
|||
|
|
@ -1397,17 +1397,93 @@ get_vmem_type(enum amd_gfx_level gfx_level, Instruction* instr)
|
|||
return 0;
|
||||
}
|
||||
|
||||
unsigned
|
||||
parse_vdst_wait(Instruction* instr)
|
||||
/* Parse implicit data dependency resolution:
|
||||
* Returns the value of each counter that must be reached
|
||||
* before an instruction is issued.
|
||||
*
|
||||
* (Probably incomplete.)
|
||||
*/
|
||||
depctr_wait
|
||||
parse_depctr_wait(const Instruction* instr)
|
||||
{
|
||||
if (instr->isVMEM() || instr->isFlatLike() || instr->isDS() || instr->isEXP())
|
||||
return 0;
|
||||
else if (instr->isLDSDIR())
|
||||
return instr->ldsdir().wait_vdst;
|
||||
else if (instr->opcode == aco_opcode::s_waitcnt_depctr)
|
||||
return (instr->salu().imm >> 12) & 0xf;
|
||||
else
|
||||
return 15;
|
||||
depctr_wait res;
|
||||
if (instr->isVMEM() || instr->isFlatLike() || instr->isDS() || instr->isEXP()) {
|
||||
res.va_vdst = 0;
|
||||
res.va_exec = 0;
|
||||
res.sa_exec = 0;
|
||||
if (instr->isVMEM() || instr->isFlatLike()) {
|
||||
res.sa_sdst = 0;
|
||||
res.va_sdst = 0;
|
||||
res.va_vcc = 0;
|
||||
}
|
||||
} else if (instr->isSMEM()) {
|
||||
res.sa_sdst = 0;
|
||||
res.va_sdst = 0;
|
||||
res.va_vcc = 0;
|
||||
} else if (instr->isLDSDIR()) {
|
||||
res.va_vdst = instr->ldsdir().wait_vdst;
|
||||
res.va_exec = 0;
|
||||
res.sa_exec = 0;
|
||||
} else if (instr->opcode == aco_opcode::s_waitcnt_depctr) {
|
||||
unsigned imm = instr->salu().imm;
|
||||
res.va_vdst = (imm >> 12) & 0xf;
|
||||
res.va_sdst = (imm >> 9) & 0x7;
|
||||
res.va_ssrc = (imm >> 8) & 0x1;
|
||||
res.hold_cnt = (imm >> 7) & 0x1;
|
||||
res.vm_vsrc = (imm >> 2) & 0x7;
|
||||
res.va_vcc = (imm >> 1) & 0x1;
|
||||
res.sa_sdst = imm & 0x1;
|
||||
} else if (instr->isVALU()) {
|
||||
res.sa_exec = 0;
|
||||
for (const Definition& def : instr->definitions) {
|
||||
if (def.regClass().type() == RegType::sgpr) {
|
||||
res.sa_sdst = 0;
|
||||
/* Notably, this is the only exception, even VALU that
|
||||
* reads exec doesn't implicitly wait for va_exec.
|
||||
*/
|
||||
if (instr->opcode == aco_opcode::v_readfirstlane_b32)
|
||||
res.va_exec = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (instr_info.classes[(int)instr->opcode] == instr_class::branch ||
|
||||
instr_info.classes[(int)instr->opcode] == instr_class::sendmsg) {
|
||||
res.sa_exec = 0;
|
||||
res.va_exec = 0;
|
||||
switch (instr->opcode) {
|
||||
case aco_opcode::s_cbranch_vccz:
|
||||
case aco_opcode::s_cbranch_vccnz:
|
||||
res.va_vcc = 0;
|
||||
res.sa_sdst = 0;
|
||||
break;
|
||||
case aco_opcode::s_cbranch_scc0:
|
||||
case aco_opcode::s_cbranch_scc1:
|
||||
res.sa_sdst = 0;
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
} else if (instr->isSALU()) {
|
||||
for (const Definition& def : instr->definitions) {
|
||||
if (def.physReg() < vcc) {
|
||||
res.va_sdst = 0;
|
||||
} else if (def.physReg() <= vcc_hi) {
|
||||
res.va_vcc = 0;
|
||||
} else if (def.physReg() == exec || def.physReg() == exec_hi) {
|
||||
res.va_exec = 0;
|
||||
}
|
||||
}
|
||||
for (const Operand& op : instr->operands) {
|
||||
if (op.physReg() < vcc) {
|
||||
res.va_sdst = 0;
|
||||
} else if (op.physReg() <= vcc_hi) {
|
||||
res.va_vcc = 0;
|
||||
} else if (op.physReg() == exec || op.physReg() == exec_hi) {
|
||||
res.va_exec = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
|
|||
|
|
@ -1878,7 +1878,37 @@ enum vmem_type : uint8_t {
|
|||
*/
|
||||
uint8_t get_vmem_type(enum amd_gfx_level gfx_level, Instruction* instr);
|
||||
|
||||
unsigned parse_vdst_wait(Instruction* instr);
|
||||
/* For all of the counters, the maximum value means no wait.
|
||||
* Some of the counters are larger than their bit field,
|
||||
* but there is no wait mechanism that allows waiting only for higher values.
|
||||
*/
|
||||
struct depctr_wait {
|
||||
union {
|
||||
struct {
|
||||
/* VALU completion, apparently even used for VALU without vgpr writes. */
|
||||
unsigned va_vdst : 4;
|
||||
/* VALU sgpr write (not including vcc/vcc_hi). */
|
||||
unsigned va_sdst : 3;
|
||||
/* VALU sgpr read. */
|
||||
unsigned va_ssrc : 1;
|
||||
/* unknown. */
|
||||
unsigned hold_cnt : 1;
|
||||
/* VMEM/DS vgpr read. */
|
||||
unsigned vm_vsrc : 3;
|
||||
/* VALU vcc/vcc_hi write. */
|
||||
unsigned va_vcc : 1;
|
||||
/* SALU sgpr, vcc/vcc_hi or scc write. */
|
||||
unsigned sa_sdst : 1;
|
||||
/* VALU exec/exec_hi write. */
|
||||
unsigned va_exec : 1;
|
||||
/* SALU exec/exec_hi write. */
|
||||
unsigned sa_exec : 1;
|
||||
};
|
||||
unsigned packed = -1;
|
||||
};
|
||||
};
|
||||
|
||||
depctr_wait parse_depctr_wait(const Instruction* instr);
|
||||
|
||||
enum block_kind {
|
||||
/* uniform indicates that leaving this block,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue