mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 05:08:08 +02:00
aco: workaround VALUReadSGPRHazard
fossil-db (gfx1200): Totals from 65112 (82.01% of 79395) affected shaders: Instrs: 41732906 -> 42987198 (+3.01%); split: -0.00%, +3.01% CodeSize: 222451964 -> 226942644 (+2.02%); split: -0.01%, +2.03% Latency: 290411063 -> 290944688 (+0.18%); split: -0.00%, +0.18% InvThroughput: 45854913 -> 45910275 (+0.12%); split: -0.00%, +0.12% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30478>
This commit is contained in:
parent
9ab0c4b047
commit
47e0f468cf
1 changed files with 84 additions and 7 deletions
|
|
@ -273,6 +273,10 @@ struct NOP_ctx_gfx11 {
|
|||
/* WMMAHazards */
|
||||
std::bitset<256> vgpr_written_by_wmma;
|
||||
|
||||
/* VALUReadSGPRHazard */
|
||||
std::bitset<m0.reg() / 2> sgpr_read_by_valu; /* SGPR pairs, excluding null, exec, m0 and scc */
|
||||
CounterMap<0, m0.reg(), 11> sgpr_read_by_valu_then_wr_by_salu;
|
||||
|
||||
void join(const NOP_ctx_gfx11& other)
|
||||
{
|
||||
has_Vcmpx |= other.has_Vcmpx;
|
||||
|
|
@ -287,6 +291,8 @@ struct NOP_ctx_gfx11 {
|
|||
sgpr_read_by_valu_as_lanemask_then_wr_by_salu |=
|
||||
other.sgpr_read_by_valu_as_lanemask_then_wr_by_salu;
|
||||
vgpr_written_by_wmma |= other.vgpr_written_by_wmma;
|
||||
sgpr_read_by_valu |= other.sgpr_read_by_valu;
|
||||
sgpr_read_by_valu_then_wr_by_salu.join_min(other.sgpr_read_by_valu_then_wr_by_salu);
|
||||
}
|
||||
|
||||
bool operator==(const NOP_ctx_gfx11& other)
|
||||
|
|
@ -302,7 +308,9 @@ struct NOP_ctx_gfx11 {
|
|||
sgpr_read_by_valu_as_lanemask == other.sgpr_read_by_valu_as_lanemask &&
|
||||
sgpr_read_by_valu_as_lanemask_then_wr_by_salu ==
|
||||
other.sgpr_read_by_valu_as_lanemask_then_wr_by_salu &&
|
||||
vgpr_written_by_wmma == other.vgpr_written_by_wmma;
|
||||
vgpr_written_by_wmma == other.vgpr_written_by_wmma &&
|
||||
sgpr_read_by_valu == other.sgpr_read_by_valu &&
|
||||
sgpr_read_by_valu_then_wr_by_salu == other.sgpr_read_by_valu_then_wr_by_salu;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -1514,6 +1522,48 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr<Instruction>&
|
|||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* VALUReadSGPRHazard
|
||||
* VALU reads SGPR and later written by SALU cannot safely be read by VALU/SALU.
|
||||
*/
|
||||
if (instr->isVALU() || instr->isSALU()) {
|
||||
unsigned expiry_count = instr->isSALU() ? 10 : 11;
|
||||
for (Operand& op : instr->operands) {
|
||||
if (sa_sdst == 0)
|
||||
break;
|
||||
|
||||
for (unsigned i = 0; i < op.size(); i++) {
|
||||
unsigned reg = op.physReg() + i;
|
||||
if (reg < ctx.sgpr_read_by_valu_then_wr_by_salu.size() &&
|
||||
ctx.sgpr_read_by_valu_then_wr_by_salu.get(reg) < expiry_count) {
|
||||
bld.sopp(aco_opcode::s_waitcnt_depctr, 0xfffe);
|
||||
sa_sdst = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (sa_sdst == 0)
|
||||
ctx.sgpr_read_by_valu_then_wr_by_salu.reset();
|
||||
else if (instr->isSALU() && !instr->isSOPP())
|
||||
ctx.sgpr_read_by_valu_then_wr_by_salu.inc();
|
||||
|
||||
if (instr->isVALU()) {
|
||||
for (const Operand& op : instr->operands) {
|
||||
for (unsigned i = 0; i < DIV_ROUND_UP(op.size(), 2); i++) {
|
||||
unsigned reg = (op.physReg() / 2) + i;
|
||||
if (reg < ctx.sgpr_read_by_valu.size())
|
||||
ctx.sgpr_read_by_valu.set(reg);
|
||||
}
|
||||
}
|
||||
} else if (instr->isSALU() && !instr->definitions.empty()) {
|
||||
for (unsigned i = 0; i < instr->definitions[0].size(); i++) {
|
||||
unsigned def_reg = instr->definitions[0].physReg() + i;
|
||||
if ((def_reg / 2) < ctx.sgpr_read_by_valu.size() && ctx.sgpr_read_by_valu[def_reg / 2])
|
||||
ctx.sgpr_read_by_valu_then_wr_by_salu.set(def_reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* LdsDirectVMEMHazard
|
||||
|
|
@ -1670,6 +1720,15 @@ resolve_all_gfx11(State& state, NOP_ctx_gfx11& ctx,
|
|||
}
|
||||
}
|
||||
|
||||
/* VALUReadSGPRHazard */
|
||||
if (state.program->gfx_level >= GFX12) {
|
||||
for (unsigned i = 0; i < ctx.sgpr_read_by_valu_then_wr_by_salu.size(); i++) {
|
||||
if (ctx.sgpr_read_by_valu_then_wr_by_salu.get(i) < 11)
|
||||
waitcnt_depctr &= 0xfffe;
|
||||
}
|
||||
ctx.sgpr_read_by_valu_then_wr_by_salu.reset();
|
||||
}
|
||||
|
||||
/* LdsDirectVMEMHazard */
|
||||
if (ctx.vgpr_used_by_vmem_load.any() || ctx.vgpr_used_by_vmem_store.any() ||
|
||||
ctx.vgpr_used_by_ds.any() || ctx.vgpr_used_by_vmem_sample.any() ||
|
||||
|
|
@ -1745,7 +1804,7 @@ handle_block(Program* program, Ctx& ctx, Block& block)
|
|||
|
||||
template <typename Ctx, HandleInstr<Ctx> Handle, ResolveAll<Ctx> Resolve>
|
||||
void
|
||||
mitigate_hazards(Program* program)
|
||||
mitigate_hazards(Program* program, Ctx initial_ctx = Ctx())
|
||||
{
|
||||
std::vector<Ctx> all_ctx(program->blocks.size());
|
||||
std::stack<unsigned, std::vector<unsigned>> loop_header_indices;
|
||||
|
|
@ -1754,6 +1813,9 @@ mitigate_hazards(Program* program)
|
|||
Block& block = program->blocks[i];
|
||||
Ctx& ctx = all_ctx[i];
|
||||
|
||||
if (i == 0 || (block.kind & block_kind_resume))
|
||||
ctx = initial_ctx;
|
||||
|
||||
if (block.kind & block_kind_loop_header) {
|
||||
loop_header_indices.push(i);
|
||||
} else if (block.kind & block_kind_loop_exit) {
|
||||
|
|
@ -1851,14 +1913,29 @@ required_export_priority(Program* program)
|
|||
void
|
||||
insert_NOPs(Program* program)
|
||||
{
|
||||
if (program->gfx_level >= GFX11)
|
||||
mitigate_hazards<NOP_ctx_gfx11, handle_instruction_gfx11, resolve_all_gfx11>(program);
|
||||
else if (program->gfx_level >= GFX10_3)
|
||||
if (program->gfx_level >= GFX11) {
|
||||
NOP_ctx_gfx11 initial_ctx;
|
||||
|
||||
bool has_previous_part =
|
||||
program->is_epilog || program->info.vs.has_prolog || program->info.ps.has_prolog ||
|
||||
(program->info.merged_shader_compiled_separately && program->stage.sw != SWStage::VS &&
|
||||
program->stage.sw != SWStage::TES) || program->stage == raytracing_cs;
|
||||
if (program->gfx_level >= GFX12 && has_previous_part) {
|
||||
/* resolve_all_gfx11 can't resolve VALUReadSGPRHazard entirely. We have to assume that any
|
||||
* SGPR might have been read by VALU if there was a previous shader part.
|
||||
*/
|
||||
initial_ctx.sgpr_read_by_valu.flip();
|
||||
}
|
||||
|
||||
mitigate_hazards<NOP_ctx_gfx11, handle_instruction_gfx11, resolve_all_gfx11>(program,
|
||||
initial_ctx);
|
||||
} else if (program->gfx_level >= GFX10_3) {
|
||||
; /* no hazards/bugs to mitigate */
|
||||
else if (program->gfx_level >= GFX10)
|
||||
} else if (program->gfx_level >= GFX10) {
|
||||
mitigate_hazards<NOP_ctx_gfx10, handle_instruction_gfx10, resolve_all_gfx10>(program);
|
||||
else
|
||||
} else {
|
||||
mitigate_hazards<NOP_ctx_gfx6, handle_instruction_gfx6, resolve_all_gfx6>(program);
|
||||
}
|
||||
|
||||
if (program->gfx_level == GFX11_5 && (program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER ||
|
||||
program->stage.hw == AC_HW_PIXEL_SHADER))
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue