mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 05:18:08 +02:00
aco/gfx11.5: workaround export priority issue
https://github.com/llvm/llvm-project/pull/99273
fossil-db (gfx1150):
Totals from 73996 (93.20% of 79395) affected shaders:
Instrs: 36015357 -> 36807177 (+2.20%)
CodeSize: 189072544 -> 192238748 (+1.67%)
Latency: 245845181 -> 246790550 (+0.38%); split: -0.00%, +0.38%
InvThroughput: 45068018 -> 45116177 (+0.11%); split: -0.00%, +0.11%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Backport-to: 24.2
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30241>
(cherry picked from commit 0919ce1ac4)
This commit is contained in:
parent
f6ba6a5205
commit
dbb7731a90
5 changed files with 74 additions and 2 deletions
|
|
@ -234,7 +234,7 @@
|
|||
"description": "aco/gfx11.5: workaround export priority issue",
|
||||
"nominated": true,
|
||||
"nomination_type": 4,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": null,
|
||||
"notes": null
|
||||
|
|
|
|||
|
|
@ -1772,6 +1772,70 @@ mitigate_hazards(Program* program)
|
|||
}
|
||||
}
|
||||
|
||||
/* FeatureRequiredExportPriority in LLVM */
|
||||
void
|
||||
required_export_priority(Program* program)
|
||||
{
|
||||
/* Skip callees, assuming that the caller has already increased the priority. */
|
||||
bool increase_priority = !program->is_epilog && !program->info.vs.has_prolog &&
|
||||
(!program->info.merged_shader_compiled_separately ||
|
||||
program->stage.sw == SWStage::VS || program->stage.sw == SWStage::TES);
|
||||
increase_priority |= program->is_prolog;
|
||||
|
||||
for (Block& block : program->blocks) {
|
||||
std::vector<aco_ptr<Instruction>> new_instructions;
|
||||
new_instructions.reserve(block.instructions.size() + 6);
|
||||
|
||||
Builder bld(program, &new_instructions);
|
||||
|
||||
if (increase_priority && block.index == 0) {
|
||||
if (!block.instructions.empty() && block.instructions[0]->opcode == aco_opcode::s_setprio)
|
||||
block.instructions[0]->salu().imm = MAX2(block.instructions[0]->salu().imm, 2);
|
||||
else
|
||||
bld.sopp(aco_opcode::s_setprio, 2);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < block.instructions.size(); i++) {
|
||||
Instruction* instr = block.instructions[i].get();
|
||||
new_instructions.push_back(std::move(block.instructions[i]));
|
||||
|
||||
if (instr->opcode == aco_opcode::s_setprio) {
|
||||
instr->salu().imm = MAX2(instr->salu().imm, 2);
|
||||
continue;
|
||||
}
|
||||
|
||||
bool end_of_export_sequence = instr->isEXP() && (i == block.instructions.size() - 1 ||
|
||||
!block.instructions[i + 1]->isEXP());
|
||||
if (!end_of_export_sequence)
|
||||
continue;
|
||||
|
||||
bool before_endpgm = false;
|
||||
if (i != block.instructions.size() - 1) {
|
||||
before_endpgm = block.instructions[i + 1]->opcode == aco_opcode::s_endpgm;
|
||||
} else {
|
||||
/* Does this fallthrough to a s_endpgm? */
|
||||
for (unsigned j = block.index + 1; j < program->blocks.size(); j++) {
|
||||
if (program->blocks[j].instructions.size() == 1 &&
|
||||
program->blocks[j].instructions[0]->opcode == aco_opcode::s_endpgm)
|
||||
before_endpgm = true;
|
||||
if (!program->blocks[j].instructions.empty())
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bld.sopp(aco_opcode::s_setprio, 0);
|
||||
if (!before_endpgm)
|
||||
bld.sopk(aco_opcode::s_waitcnt_expcnt, Operand(sgpr_null, s1), 0);
|
||||
bld.sopp(aco_opcode::s_nop, 0);
|
||||
bld.sopp(aco_opcode::s_nop, 0);
|
||||
if (!before_endpgm)
|
||||
bld.sopp(aco_opcode::s_setprio, 2);
|
||||
}
|
||||
|
||||
block.instructions = std::move(new_instructions);
|
||||
}
|
||||
}
|
||||
|
||||
} /* end namespace */
|
||||
|
||||
void
|
||||
|
|
@ -1785,6 +1849,10 @@ insert_NOPs(Program* program)
|
|||
mitigate_hazards<NOP_ctx_gfx10, handle_instruction_gfx10, resolve_all_gfx10>(program);
|
||||
else
|
||||
mitigate_hazards<NOP_ctx_gfx6, handle_instruction_gfx6, resolve_all_gfx6>(program);
|
||||
|
||||
if (program->gfx_level == GFX11_5 && (program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER ||
|
||||
program->stage.hw == AC_HW_PIXEL_SHADER))
|
||||
required_export_priority(program);
|
||||
}
|
||||
|
||||
} // namespace aco
|
||||
|
|
|
|||
|
|
@ -12912,7 +12912,9 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
|
|||
|
||||
block->instructions.reserve(16 + pinfo->num_attributes * 4);
|
||||
|
||||
bld.sopp(aco_opcode::s_setprio, 0x3u);
|
||||
/* Besides performance, the purpose of this is also for the FeatureRequiredExportPriority GFX11.5
|
||||
* issue. */
|
||||
bld.sopp(aco_opcode::s_setprio, 3);
|
||||
|
||||
uint32_t attrib_mask = BITFIELD_MASK(pinfo->num_attributes);
|
||||
bool has_nontrivial_divisors = pinfo->nontrivial_divisors;
|
||||
|
|
|
|||
|
|
@ -222,6 +222,7 @@ aco_compile_shader_part(const struct aco_compiler_options* options,
|
|||
program->debug.private_data = options->debug.private_data;
|
||||
|
||||
program->is_prolog = is_prolog;
|
||||
program->is_epilog = !is_prolog;
|
||||
|
||||
/* Instruction selection */
|
||||
select_shader_part(program.get(), pinfo, &config, options, info, args);
|
||||
|
|
|
|||
|
|
@ -2035,6 +2035,7 @@ public:
|
|||
bool has_pops_overlapped_waves_wait = false;
|
||||
bool has_color_exports = false;
|
||||
bool is_prolog = false;
|
||||
bool is_epilog = false;
|
||||
|
||||
std::vector<uint8_t> constant_data;
|
||||
Temp private_segment_buffer;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue