aco/gfx11.5: workaround export priority issue

https://github.com/llvm/llvm-project/pull/99273

fossil-db (gfx1150):
Totals from 73996 (93.20% of 79395) affected shaders:
Instrs: 36015357 -> 36807177 (+2.20%)
CodeSize: 189072544 -> 192238748 (+1.67%)
Latency: 245845181 -> 246790550 (+0.38%); split: -0.00%, +0.38%
InvThroughput: 45068018 -> 45116177 (+0.11%); split: -0.00%, +0.11%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Backport-to: 24.2
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30241>
(cherry picked from commit 0919ce1ac4)
This commit is contained in:
Rhys Perry 2024-07-17 12:28:52 +01:00 committed by Eric Engestrom
parent f6ba6a5205
commit dbb7731a90
5 changed files with 74 additions and 2 deletions

View file

@ -234,7 +234,7 @@
"description": "aco/gfx11.5: workaround export priority issue",
"nominated": true,
"nomination_type": 4,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": null,
"notes": null

View file

@ -1772,6 +1772,70 @@ mitigate_hazards(Program* program)
}
}
/* FeatureRequiredExportPriority in LLVM */
void
required_export_priority(Program* program)
{
/* Skip callees, assuming that the caller has already increased the priority. */
bool increase_priority = !program->is_epilog && !program->info.vs.has_prolog &&
(!program->info.merged_shader_compiled_separately ||
program->stage.sw == SWStage::VS || program->stage.sw == SWStage::TES);
increase_priority |= program->is_prolog;
for (Block& block : program->blocks) {
std::vector<aco_ptr<Instruction>> new_instructions;
new_instructions.reserve(block.instructions.size() + 6);
Builder bld(program, &new_instructions);
if (increase_priority && block.index == 0) {
if (!block.instructions.empty() && block.instructions[0]->opcode == aco_opcode::s_setprio)
block.instructions[0]->salu().imm = MAX2(block.instructions[0]->salu().imm, 2);
else
bld.sopp(aco_opcode::s_setprio, 2);
}
for (unsigned i = 0; i < block.instructions.size(); i++) {
Instruction* instr = block.instructions[i].get();
new_instructions.push_back(std::move(block.instructions[i]));
if (instr->opcode == aco_opcode::s_setprio) {
instr->salu().imm = MAX2(instr->salu().imm, 2);
continue;
}
bool end_of_export_sequence = instr->isEXP() && (i == block.instructions.size() - 1 ||
!block.instructions[i + 1]->isEXP());
if (!end_of_export_sequence)
continue;
bool before_endpgm = false;
if (i != block.instructions.size() - 1) {
before_endpgm = block.instructions[i + 1]->opcode == aco_opcode::s_endpgm;
} else {
/* Does this fallthrough to a s_endpgm? */
for (unsigned j = block.index + 1; j < program->blocks.size(); j++) {
if (program->blocks[j].instructions.size() == 1 &&
program->blocks[j].instructions[0]->opcode == aco_opcode::s_endpgm)
before_endpgm = true;
if (!program->blocks[j].instructions.empty())
break;
}
}
bld.sopp(aco_opcode::s_setprio, 0);
if (!before_endpgm)
bld.sopk(aco_opcode::s_waitcnt_expcnt, Operand(sgpr_null, s1), 0);
bld.sopp(aco_opcode::s_nop, 0);
bld.sopp(aco_opcode::s_nop, 0);
if (!before_endpgm)
bld.sopp(aco_opcode::s_setprio, 2);
}
block.instructions = std::move(new_instructions);
}
}
} /* end namespace */
void
@ -1785,6 +1849,10 @@ insert_NOPs(Program* program)
mitigate_hazards<NOP_ctx_gfx10, handle_instruction_gfx10, resolve_all_gfx10>(program);
else
mitigate_hazards<NOP_ctx_gfx6, handle_instruction_gfx6, resolve_all_gfx6>(program);
if (program->gfx_level == GFX11_5 && (program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER ||
program->stage.hw == AC_HW_PIXEL_SHADER))
required_export_priority(program);
}
} // namespace aco

View file

@ -12912,7 +12912,9 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
block->instructions.reserve(16 + pinfo->num_attributes * 4);
bld.sopp(aco_opcode::s_setprio, 0x3u);
/* Besides performance, the purpose of this is also for the FeatureRequiredExportPriority GFX11.5
* issue. */
bld.sopp(aco_opcode::s_setprio, 3);
uint32_t attrib_mask = BITFIELD_MASK(pinfo->num_attributes);
bool has_nontrivial_divisors = pinfo->nontrivial_divisors;

View file

@ -222,6 +222,7 @@ aco_compile_shader_part(const struct aco_compiler_options* options,
program->debug.private_data = options->debug.private_data;
program->is_prolog = is_prolog;
program->is_epilog = !is_prolog;
/* Instruction selection */
select_shader_part(program.get(), pinfo, &config, options, info, args);

View file

@ -2035,6 +2035,7 @@ public:
bool has_pops_overlapped_waves_wait = false;
bool has_color_exports = false;
bool is_prolog = false;
bool is_epilog = false;
std::vector<uint8_t> constant_data;
Temp private_segment_buffer;