aco: insert s_delay_alu on the linear CFG

fossil-db (gfx1100):
Totals from 10498 (7.87% of 133428) affected shaders:
Instrs: 22274711 -> 22277717 (+0.01%); split: -0.01%, +0.03%
CodeSize: 114557040 -> 114569064 (+0.01%); split: -0.01%, +0.02%
Latency: 236505186 -> 236497338 (-0.00%); split: -0.00%, +0.00%
InvThroughput: 33425052 -> 33423876 (-0.00%); split: -0.00%, +0.00%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23213>
This commit is contained in:
Rhys Perry 2023-05-23 14:04:41 +01:00 committed by Marge Bot
parent d7f48a61ec
commit 54c0088629

View file

@ -782,11 +782,15 @@ void
insert_wait_entry(wait_ctx& ctx, Definition def, wait_event event, uint8_t vmem_types = 0,
unsigned cycles = 0)
{
/* We can't safely write to unwritten destination VGPR lanes on GFX11 without waiting for
* the load to finish.
/* We can't safely write to unwritten destination VGPR lanes with DS/VMEM on GFX11 without
* waiting for the load to finish.
* Also, follow linear control flow for ALU because it's unlikely that the hardware does per-lane
* dependency checks.
*/
uint32_t ds_vmem_events = event_lds | event_gds | event_vmem | event_flat;
uint32_t alu_events = event_trans | event_valu | event_salu;
bool force_linear =
ctx.gfx_level >= GFX11 && (event & (event_lds | event_gds | event_vmem | event_flat));
ctx.gfx_level >= GFX11 && (event & (ds_vmem_events | alu_events));
insert_wait_entry(ctx, def.physReg(), def.regClass(), event, true, vmem_types, cycles,
force_linear);