aco: forget valu delays after certain s_waitcnt_depctr/LDSDIR

fossil-db (navi31):
Totals from 55242 (69.58% of 79395) affected shaders:
Instrs: 40507666 -> 40138006 (-0.91%); split: -0.91%, +0.00%
CodeSize: 212516104 -> 211025880 (-0.70%); split: -0.70%, +0.00%
Latency: 281643258 -> 281628053 (-0.01%); split: -0.01%, +0.00%
InvThroughput: 46370668 -> 46369637 (-0.00%); split: -0.00%, +0.00%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Acked-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23337>
This commit is contained in:
Rhys Perry 2024-08-05 12:30:06 +01:00 committed by Marge Bot
parent 30396ba604
commit 7b92e11e16
4 changed files with 29 additions and 23 deletions

View file

@ -1136,19 +1136,6 @@ test_vgpr_bitset(std::bitset<256>& set, Operand op)
}
/* GFX11 */
unsigned
parse_vdst_wait(aco_ptr<Instruction>& instr)
{
if (instr->isVMEM() || instr->isFlatLike() || instr->isDS() || instr->isEXP())
return 0;
else if (instr->isLDSDIR())
return instr->ldsdir().wait_vdst;
else if (instr->opcode == aco_opcode::s_waitcnt_depctr)
return (instr->salu().imm >> 12) & 0xf;
else
return 15;
}
struct LdsDirectVALUHazardGlobalState {
unsigned wait_vdst = 15;
PhysReg vgpr;
@ -1188,7 +1175,7 @@ handle_lds_direct_valu_hazard_instr(LdsDirectVALUHazardGlobalState& global_state
block_state.num_valu++;
}
if (parse_vdst_wait(instr) == 0)
if (parse_vdst_wait(instr.get()) == 0)
return true;
block_state.num_instrs++;
@ -1310,7 +1297,7 @@ handle_valu_partial_forwarding_hazard_instr(VALUPartialForwardingHazardGlobalSta
}
block_state.num_valu_since_read++;
} else if (parse_vdst_wait(instr) == 0) {
} else if (parse_vdst_wait(instr.get()) == 0) {
return true;
}
@ -1407,7 +1394,7 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr<Instruction>&
ctx.has_Vcmpx = false;
}
unsigned va_vdst = parse_vdst_wait(instr);
unsigned va_vdst = parse_vdst_wait(instr.get());
unsigned vm_vsrc = 7;
unsigned sa_sdst = 1;
@ -1608,7 +1595,7 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr<Instruction>&
bool
has_vdst0_since_valu_instr(bool& global_state, unsigned& block_state, aco_ptr<Instruction>& pred)
{
if (parse_vdst_wait(pred) == 0)
if (parse_vdst_wait(pred.get()) == 0)
return true;
if (--block_state == 0) {

View file

@ -192,6 +192,16 @@ update_alu(delay_ctx& ctx, bool is_valu, bool is_trans, int cycles)
void
kill_alu(alu_delay_info& delay, Instruction* instr, delay_ctx& ctx)
{
if (parse_vdst_wait(instr) == 0) {
std::map<PhysReg, alu_delay_info>::iterator it = ctx.gpr_map.begin();
while (it != ctx.gpr_map.end()) {
alu_delay_info& entry = it->second;
entry.valu_instrs = alu_delay_info::valu_nop;
entry.trans_instrs = alu_delay_info::trans_nop;
it = it->second.fixup() ? ctx.gpr_map.erase(it) : std::next(it);
}
}
if (instr->isVALU() || instr->isSALU())
check_alu(ctx, delay, instr);
@ -213,12 +223,6 @@ kill_alu(alu_delay_info& delay, Instruction* instr, delay_ctx& ctx)
void
gen_alu(Instruction* instr, delay_ctx& ctx)
{
if (instr->isEXP() || instr->isDS() || instr->isMIMG() || instr->isFlatLike() ||
instr->isMUBUF() || instr->isMTBUF()) {
ctx.gpr_map.clear();
return;
}
Instruction_cycle_info cycle_info = get_cycle_info(*ctx.program, *instr);
bool is_valu = instr->isVALU();
bool is_trans = instr->isTrans();

View file

@ -1398,6 +1398,19 @@ get_vmem_type(enum amd_gfx_level gfx_level, Instruction* instr)
return 0;
}
unsigned
parse_vdst_wait(Instruction* instr)
{
if (instr->isVMEM() || instr->isFlatLike() || instr->isDS() || instr->isEXP())
return 0;
else if (instr->isLDSDIR())
return instr->ldsdir().wait_vdst;
else if (instr->opcode == aco_opcode::s_waitcnt_depctr)
return (instr->salu().imm >> 12) & 0xf;
else
return 15;
}
bool
dealloc_vgprs(Program* program)
{

View file

@ -1853,6 +1853,8 @@ enum vmem_type : uint8_t {
*/
uint8_t get_vmem_type(enum amd_gfx_level gfx_level, Instruction* instr);
unsigned parse_vdst_wait(Instruction* instr);
enum block_kind {
/* uniform indicates that leaving this block,
* all actives lanes stay active */