aco: use pass_flags to recover s_delay_alu cycles

This is simpler and more accurate.

fossil-db (gfx1100):
Totals from 11678 (8.75% of 133428) affected shaders:
Instrs: 25448655 -> 25436028 (-0.05%)
CodeSize: 130364728 -> 130314220 (-0.04%)
Latency: 325247603 -> 325231064 (-0.01%); split: -0.01%, +0.00%
InvThroughput: 45901166 -> 45900022 (-0.00%); split: -0.00%, +0.00%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23213>
This commit is contained in:
Rhys Perry 2023-05-22 17:44:32 +01:00 committed by Marge Bot
parent d9cdb3524a
commit d7f48a61ec

View file

@ -434,16 +434,8 @@ parse_delay_alu(wait_ctx& ctx, alu_delay_info& delay, Instruction* instr)
delay.salu_cycles = imm[i] - (uint32_t)alu_delay_wait::SALU_CYCLE_1 + 1;
}
for (std::pair<const PhysReg, wait_entry>& e : ctx.gpr_map) {
wait_entry& entry = e.second;
if (delay.valu_instrs <= entry.delay.valu_instrs)
delay.valu_cycles = std::max(delay.valu_cycles, entry.delay.valu_cycles);
if (delay.trans_instrs <= entry.delay.trans_instrs)
delay.trans_cycles = std::max(delay.trans_cycles, entry.delay.trans_cycles);
if (delay.salu_cycles <= entry.delay.salu_cycles)
delay.salu_cycles = std::max(delay.salu_cycles, entry.delay.salu_cycles);
}
delay.valu_cycles = instr->pass_flags & 0xffff;
delay.trans_cycles = instr->pass_flags >> 16;
return true;
}
@ -981,6 +973,7 @@ emit_delay_alu(wait_ctx& ctx, std::vector<aco_ptr<Instruction>>& instructions,
create_instruction<SOPP_instruction>(aco_opcode::s_delay_alu, Format::SOPP, 0, 0);
inst->imm = imm;
inst->block = -1;
inst->pass_flags = (delay.valu_cycles | (delay.trans_cycles << 16));
instructions.emplace_back(inst);
delay = alu_delay_info();
}