aco/assembler: Find loop exits using the successor's loop nest depth

Previously, we just used the next block after a loop that
has a back-edge. This assumes that loop-exit blocks can
only be removed when falling through to the next block,
when in fact it can also be a jump to somewhere else,
in future even to some block before the actual loop.

12 (0.02% of 79395) affected shaders.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32477>
This commit is contained in:
Daniel Schürmann 2024-11-27 14:43:20 +01:00 committed by Marge Bot
parent 29c63de062
commit de1e38e214

View file

@ -37,6 +37,7 @@ struct asm_context {
std::map<unsigned, constaddr_info> resumeaddrs;
std::vector<struct aco_symbol>* symbols;
uint32_t loop_header = -1u;
uint32_t loop_exit = 0u;
const int16_t* opcode;
// TODO: keep track of branch instructions referring blocks
// and, when emitting the block, correct the offset in instr
@ -1675,13 +1676,14 @@ fix_constaddrs(asm_context& ctx, std::vector<uint32_t>& out)
void
align_block(asm_context& ctx, std::vector<uint32_t>& code, Block& block)
{
/* Blocks with block_kind_loop_exit might be eliminated after jump threading, so we instead find
* loop exits using loop_nest_depth.
*/
if (ctx.loop_header != -1u && !block.linear_preds.empty() &&
/* Align the previous loop. */
if (ctx.loop_header != -1u &&
block.loop_nest_depth < ctx.program->blocks[ctx.loop_header].loop_nest_depth) {
assert(ctx.loop_exit != -1u);
Block& loop_header = ctx.program->blocks[ctx.loop_header];
Block& loop_exit = ctx.program->blocks[ctx.loop_exit];
ctx.loop_header = -1u;
ctx.loop_exit = -1u;
std::vector<uint32_t> nops;
const unsigned loop_num_cl = DIV_ROUND_UP(block.offset - loop_header.offset, 16);
@ -1700,9 +1702,14 @@ align_block(asm_context& ctx, std::vector<uint32_t>& code, Block& block)
emit_instruction(ctx, nops, instr);
insert_code(ctx, code, loop_header.offset, nops.size(), nops.data());
/* Change prefetch mode back to default (0x3). */
bld.reset(&block.instructions, block.instructions.begin());
bld.sopp(aco_opcode::s_inst_prefetch, 0x3);
/* Change prefetch mode back to default (0x3) at the loop exit. */
bld.reset(&loop_exit.instructions, loop_exit.instructions.begin());
instr = bld.sopp(aco_opcode::s_inst_prefetch, 0x3);
if (ctx.loop_exit < block.index) {
nops.clear();
emit_instruction(ctx, nops, instr);
insert_code(ctx, code, loop_exit.offset, nops.size(), nops.data());
}
}
const unsigned loop_start_cl = loop_header.offset >> 4;
@ -1726,8 +1733,22 @@ align_block(asm_context& ctx, std::vector<uint32_t>& code, Block& block)
* to not break the alignment of inner loops by handling outer loops.
* Also ignore loops without back-edge.
*/
if (block.linear_preds.size() > 1)
if (block.linear_preds.size() > 1) {
ctx.loop_header = block.index;
ctx.loop_exit = -1u;
}
}
/* Blocks with block_kind_loop_exit might be eliminated after jump threading,
* so we instead find loop exits using the successors when in loop_nest_depth.
* This works, because control flow always re-converges after loops.
*/
if (ctx.loop_header != -1u && ctx.loop_exit == -1u) {
for (uint32_t succ_idx : block.linear_succs) {
Block& succ = ctx.program->blocks[succ_idx];
if (succ.loop_nest_depth < ctx.program->blocks[ctx.loop_header].loop_nest_depth)
ctx.loop_exit = succ_idx;
}
}
/* align resume shaders with cache line */