mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-27 10:30:08 +01:00
aco/gfx10_3: work around NSA hazard
4+ dword NSA can hang if exec becomes non-zero again directly before the instruction. Foz-DB Navi21: Totals from 608 (0.74% of 82161) affected shaders: Instrs: 945138 -> 946431 (+0.14%) CodeSize: 5171580 -> 5176864 (+0.10%) Latency: 13356895 -> 13357113 (+0.00%) InvThroughput: 3043234 -> 3043236 (+0.00%); split: -0.00%, +0.00% Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9852 Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/13981 Cc: mesa-stable Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38215>
This commit is contained in:
parent
bcb6e6b6e6
commit
b2172467d1
2 changed files with 33 additions and 5 deletions
|
|
@ -338,6 +338,17 @@ Only `s_waitcnt_vscnt null, 0`. Needed even if the first instruction is a load.
|
|||
NSA MIMG instructions should be limited to 3 dwords before GFX10.3 to avoid
|
||||
stability issues: https://reviews.llvm.org/D103348
|
||||
|
||||
## RDNA2 / GFX10.3 hazards
|
||||
|
||||
### SALU EXEC write followed by NSA MIMG instruction
|
||||
|
||||
Triggered-by:
|
||||
Potential stability issues can occur if an SALU instruction changes exec from 0
|
||||
to non-zero immediately before an NSA MIMG instruction with 4+ dwords.
|
||||
|
||||
Mitigated-by: Any instruction, including `s_nop`.
|
||||
|
||||
|
||||
## RDNA3 / GFX11 hazards
|
||||
|
||||
### VcmpxPermlaneHazard
|
||||
|
|
|
|||
|
|
@ -129,6 +129,7 @@ struct NOP_ctx_gfx10 {
|
|||
bool has_branch_after_DS = false;
|
||||
bool has_NSA_MIMG = false;
|
||||
bool has_writelane = false;
|
||||
bool has_salu_exec_write = false;
|
||||
std::bitset<128> sgprs_read_by_VMEM;
|
||||
std::bitset<128> sgprs_read_by_VMEM_store;
|
||||
std::bitset<128> sgprs_read_by_DS;
|
||||
|
|
@ -145,6 +146,7 @@ struct NOP_ctx_gfx10 {
|
|||
has_branch_after_DS |= other.has_branch_after_DS;
|
||||
has_NSA_MIMG |= other.has_NSA_MIMG;
|
||||
has_writelane |= other.has_writelane;
|
||||
has_salu_exec_write |= other.has_salu_exec_write;
|
||||
sgprs_read_by_VMEM |= other.sgprs_read_by_VMEM;
|
||||
sgprs_read_by_DS |= other.sgprs_read_by_DS;
|
||||
sgprs_read_by_VMEM_store |= other.sgprs_read_by_VMEM_store;
|
||||
|
|
@ -159,6 +161,7 @@ struct NOP_ctx_gfx10 {
|
|||
has_branch_after_VMEM == other.has_branch_after_VMEM && has_DS == other.has_DS &&
|
||||
has_branch_after_DS == other.has_branch_after_DS &&
|
||||
has_NSA_MIMG == other.has_NSA_MIMG && has_writelane == other.has_writelane &&
|
||||
has_salu_exec_write == other.has_salu_exec_write &&
|
||||
sgprs_read_by_VMEM == other.sgprs_read_by_VMEM &&
|
||||
sgprs_read_by_DS == other.sgprs_read_by_DS &&
|
||||
sgprs_read_by_VMEM_store == other.sgprs_read_by_VMEM_store &&
|
||||
|
|
@ -907,6 +910,15 @@ handle_instruction_gfx10(State& state, NOP_ctx_gfx10& ctx, aco_ptr<Instruction>&
|
|||
ctx.waits_since_fp_atomic = std::min(ctx.waits_since_fp_atomic, 3);
|
||||
}
|
||||
|
||||
/* 4+ dword NSA can hang if exec becomes non-zero again directly before the instruction. */
|
||||
if (instr->isSALU() && instr->writes_exec()) {
|
||||
ctx.has_salu_exec_write = true;
|
||||
} else if (ctx.has_salu_exec_write) {
|
||||
ctx.has_salu_exec_write = false;
|
||||
if (instr->isMIMG() && get_mimg_nsa_dwords(instr.get()) > 1)
|
||||
bld.sopp(aco_opcode::s_nop, 0);
|
||||
}
|
||||
|
||||
if (state.program->gfx_level != GFX10)
|
||||
return; /* no other hazards/bugs to mitigate */
|
||||
|
||||
|
|
@ -2019,13 +2031,15 @@ required_export_priority(Program* program)
|
|||
void
|
||||
insert_NOPs(Program* program)
|
||||
{
|
||||
bool has_previous_part =
|
||||
program->is_epilog || program->info.vs.has_prolog || program->info.ps.has_prolog ||
|
||||
(program->info.merged_shader_compiled_separately && program->stage.sw != SWStage::VS &&
|
||||
program->stage.sw != SWStage::TES) ||
|
||||
program->stage == raytracing_cs;
|
||||
|
||||
if (program->gfx_level >= GFX11) {
|
||||
NOP_ctx_gfx11 initial_ctx;
|
||||
|
||||
bool has_previous_part =
|
||||
program->is_epilog || program->info.vs.has_prolog || program->info.ps.has_prolog ||
|
||||
(program->info.merged_shader_compiled_separately && program->stage.sw != SWStage::VS &&
|
||||
program->stage.sw != SWStage::TES) || program->stage == raytracing_cs;
|
||||
if (program->gfx_level >= GFX12 && has_previous_part) {
|
||||
/* resolve_all_gfx11 can't resolve VALUReadSGPRHazard entirely. We have to assume that any
|
||||
* SGPR might have been read by VALU if there was a previous shader part.
|
||||
|
|
@ -2036,7 +2050,10 @@ insert_NOPs(Program* program)
|
|||
mitigate_hazards<NOP_ctx_gfx11, handle_instruction_gfx11, resolve_all_gfx11>(program,
|
||||
initial_ctx);
|
||||
} else if (program->gfx_level >= GFX10) {
|
||||
mitigate_hazards<NOP_ctx_gfx10, handle_instruction_gfx10, resolve_all_gfx10>(program);
|
||||
NOP_ctx_gfx10 initial_ctx;
|
||||
initial_ctx.has_salu_exec_write = has_previous_part;
|
||||
mitigate_hazards<NOP_ctx_gfx10, handle_instruction_gfx10, resolve_all_gfx10>(program,
|
||||
initial_ctx);
|
||||
} else {
|
||||
mitigate_hazards<NOP_ctx_gfx6, handle_instruction_gfx6, resolve_all_gfx6>(program);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue