aco: fix update_alu(clear=true) for exports

For:
   v_mov_b32_e32 v0, 1.0
   exp mrtz v0, off, off, off
we should completely remove the ALU entry before creating the EXP's WaR entry for v0.
Otherwise, the two will be combined into an entry which will wait for
expcnt(0) for later uses of v0.

gen_alu() should also be before gen(), since gen_alu() performs the clear
while gen() creates the WaR entry.

fossil-db (gfx1100):
Totals from 3589 (2.69% of 133428) affected shaders:
Instrs: 5591041 -> 5589047 (-0.04%); split: -0.04%, +0.00%
CodeSize: 28580840 -> 28572864 (-0.03%); split: -0.03%, +0.00%
Latency: 65427923 -> 65427543 (-0.00%); split: -0.00%, +0.00%
InvThroughput: 11109079 -> 11109065 (-0.00%); split: -0.00%, +0.00%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23213>
This commit is contained in:
Rhys Perry 2023-05-22 16:32:00 +01:00 committed by Marge Bot
parent a63a38aeaf
commit d9cdb3524a

View file

@ -495,11 +495,12 @@ force_waitcnt(wait_ctx& ctx, wait_imm& imm)
void
update_alu(wait_ctx& ctx, bool is_valu, bool is_trans, bool clear, int cycles)
{
for (std::pair<const PhysReg, wait_entry>& e : ctx.gpr_map) {
wait_entry& entry = e.second;
std::map<PhysReg, wait_entry>::iterator it = ctx.gpr_map.begin();
while (it != ctx.gpr_map.end()) {
wait_entry& entry = it->second;
if (clear) {
entry.delay = alu_delay_info();
entry.remove_counter(counter_alu);
} else {
entry.delay.valu_instrs += is_valu ? 1 : 0;
entry.delay.trans_instrs += is_trans ? 1 : 0;
@ -508,7 +509,14 @@ update_alu(wait_ctx& ctx, bool is_valu, bool is_trans, bool clear, int cycles)
entry.delay.trans_cycles -= cycles;
entry.delay.fixup();
if (it->second.delay.empty())
entry.remove_counter(counter_alu);
}
if (!entry.counters)
it = ctx.gpr_map.erase(it);
else
it++;
}
}
@ -992,9 +1000,9 @@ handle_block(Program* program, Block& block, wait_ctx& ctx)
memory_sync_info sync_info = get_sync_info(instr.get());
kill(queued_imm, queued_delay, instr.get(), ctx, sync_info);
gen(instr.get(), ctx);
if (program->gfx_level >= GFX11)
gen_alu(instr.get(), ctx);
gen(instr.get(), ctx);
if (instr->format != Format::PSEUDO_BARRIER && !is_wait && !is_delay_alu) {
if (instr->isVINTERP_INREG() && queued_imm.exp != wait_imm::unset_counter) {