mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-04 22:49:13 +02:00
aco: fix a hazard with v_interp_* and v_{read,readfirst}lane_* on GFX6
It's required to insert 1 wait state if the dst VGPR of any v_interp_* is followed by a read with v_readfirstlane or v_readlane to fix GPU hangs on GFX6. Note that v_writelane_* is apparently not affected. This hazard isn't documented anywhere but AMD confirmed it. This fixes a GPU hang with the texturemipmapgen Sascha demo on GFX6. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3533>
This commit is contained in:
parent
b9cc50fbce
commit
1ac49ba908
2 changed files with 27 additions and 0 deletions
|
|
@ -131,6 +131,15 @@ finish and then write to vcc (for example, `s_mov_b64 vcc, vcc`) to correct vccz
|
|||
|
||||
Currently, we don't do this.
|
||||
|
||||
## GCN / GFX6 hazards
|
||||
|
||||
### VINTRP followed by a read with v_readfirstlane or v_readlane
|
||||
|
||||
It's required to insert 1 wait state if the dst VGPR of any v_interp_* is
|
||||
followed by a read with v_readfirstlane or v_readlane to fix GPU hangs on GFX6.
|
||||
Note that v_writelane_* is apparently not affected. This hazard isn't
|
||||
documented anywhere but AMD confirmed it.
|
||||
|
||||
## RDNA / GFX10 hazards
|
||||
|
||||
### SMEM store followed by a load with the same address
|
||||
|
|
|
|||
|
|
@ -353,6 +353,24 @@ int handle_instruction_gfx8_9(NOP_ctx_gfx8_9& ctx, aco_ptr<Instruction>& instr,
|
|||
ctx.VALU_wrsgpr = NOPs ? new_idx : new_idx + 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* It's required to insert 1 wait state if the dst VGPR of any v_interp_*
|
||||
* is followed by a read with v_readfirstlane or v_readlane to fix GPU
|
||||
* hangs on GFX6. Note that v_writelane_* is apparently not affected.
|
||||
* This hazard isn't documented anywhere but AMD confirmed that hazard.
|
||||
*/
|
||||
if (ctx.chip_class == GFX6 &&
|
||||
!new_instructions.empty() &&
|
||||
(instr->opcode == aco_opcode::v_readfirstlane_b32 ||
|
||||
instr->opcode == aco_opcode::v_readlane_b32)) {
|
||||
aco_ptr<Instruction>& pred = new_instructions.back();
|
||||
if (pred->format == Format::VINTRP) {
|
||||
Definition pred_def = pred->definitions[0];
|
||||
Operand& op = instr->operands[0];
|
||||
if (regs_intersect(pred_def.physReg(), pred_def.size(), op.physReg(), op.size()))
|
||||
NOPs = std::max(NOPs, 1);
|
||||
}
|
||||
}
|
||||
return NOPs;
|
||||
} else if (instr->isVMEM() && ctx.VALU_wrsgpr + 5 >= new_idx) {
|
||||
/* If the VALU writes the SGPR that is used by a VMEM, the user must add five wait states. */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue