aco/gfx10: Mitigate VcmpxPermlaneHazard.

Any permlane instruction that follows any VOPC instruction can cause a hazard,
this commit implements a workaround that avoids this causing a problem.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
This commit is contained in:
Timur Kristóf 2019-10-23 11:24:53 +02:00
parent 99aed688d3
commit e5a8616973
2 changed files with 28 additions and 0 deletions

View file

@ -173,3 +173,11 @@ The 12-bit immediate OFFSET field of FLAT instructions must always be 0.
GLOBAL and SCRATCH are unaffected.
ACO doesn't use FLAT load/store on GFX10, so is unaffected.
### VcmpxPermlaneHazard
Triggered by:
Any permlane instruction that follows any VOPC instruction.
Confirmed by AMD devs that despite the name, this doesn't only affect v_cmpx.
Mitigated by: any VALU instruction except `v_nop`.

View file

@ -39,6 +39,7 @@ struct NOP_ctx {
/* GFX10 */
int last_VMEM_since_scalar_write = -1;
bool has_VOPC = false;
NOP_ctx(Program* program) : chip_class(program->chip_class) {
vcc_physical = program->config->num_sgprs - 2;
@ -283,6 +284,25 @@ std::pair<int, int> handle_instruction_gfx10(NOP_ctx& ctx, aco_ptr<Instruction>&
ctx.last_VMEM_since_scalar_write = -1;
}
/* VcmpxPermlaneHazard
* Handle any permlane following a VOPC instruction, insert v_mov between them.
*/
if (instr->format == Format::VOPC) {
ctx.has_VOPC = true;
} else if (ctx.has_VOPC &&
(instr->opcode == aco_opcode::v_permlane16_b32 ||
instr->opcode == aco_opcode::v_permlanex16_b32)) {
ctx.has_VOPC = false;
/* v_nop would be discarded by SQ, so use v_mov with the first operand of the permlane */
aco_ptr<VOP1_instruction> v_mov{create_instruction<VOP1_instruction>(aco_opcode::v_mov_b32, Format::VOP1, 1, 1)};
v_mov->definitions[0] = Definition(instr->operands[0].physReg(), v1);
v_mov->operands[0] = Operand(instr->operands[0].physReg(), v1);
new_instructions.emplace_back(std::move(v_mov));
} else if (instr->isVALU() && instr->opcode != aco_opcode::v_nop) {
ctx.has_VOPC = false;
}
return std::make_pair(sNOPs, vNOPs);
}