mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 19:40:10 +01:00
aco: fix long-jump version of discard early exit
It isn't safe to modify the exec mask before the discard block, and the definition interferes with GFX11 NOP insertion. Just use s[0:1] instead, since we won't be using it. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18125>
This commit is contained in:
parent
1a250eeae3
commit
fb13ed6ff0
4 changed files with 50 additions and 8 deletions
|
|
@ -902,10 +902,18 @@ emit_long_jump(asm_context& ctx, SOPP_instruction* branch, bool backwards,
|
|||
{
|
||||
Builder bld(ctx.program);
|
||||
|
||||
Definition def_tmp_lo(branch->definitions[0].physReg(), s1);
|
||||
Operand op_tmp_lo(branch->definitions[0].physReg(), s1);
|
||||
Definition def_tmp_hi(branch->definitions[0].physReg().advance(4), s1);
|
||||
Operand op_tmp_hi(branch->definitions[0].physReg().advance(4), s1);
|
||||
Definition def;
|
||||
if (branch->definitions.empty()) {
|
||||
assert(ctx.program->blocks[branch->block].kind & block_kind_discard_early_exit);
|
||||
def = Definition(PhysReg(0), s2); /* The discard early exit block doesn't use SGPRs. */
|
||||
} else {
|
||||
def = branch->definitions[0];
|
||||
}
|
||||
|
||||
Definition def_tmp_lo(def.physReg(), s1);
|
||||
Operand op_tmp_lo(def.physReg(), s1);
|
||||
Definition def_tmp_hi(def.physReg().advance(4), s1);
|
||||
Operand op_tmp_hi(def.physReg().advance(4), s1);
|
||||
|
||||
aco_ptr<Instruction> instr;
|
||||
|
||||
|
|
@ -926,7 +934,7 @@ emit_long_jump(asm_context& ctx, SOPP_instruction* branch, bool backwards,
|
|||
}
|
||||
|
||||
/* create the new PC and stash SCC in the LSB */
|
||||
instr.reset(bld.sop1(aco_opcode::s_getpc_b64, branch->definitions[0]).instr);
|
||||
instr.reset(bld.sop1(aco_opcode::s_getpc_b64, def).instr);
|
||||
emit_instruction(ctx, out, instr.get());
|
||||
|
||||
instr.reset(
|
||||
|
|
@ -944,7 +952,7 @@ emit_long_jump(asm_context& ctx, SOPP_instruction* branch, bool backwards,
|
|||
|
||||
/* create the s_setpc_b64 to jump */
|
||||
instr.reset(
|
||||
bld.sop1(aco_opcode::s_setpc_b64, Operand(branch->definitions[0].physReg(), s2)).instr);
|
||||
bld.sop1(aco_opcode::s_setpc_b64, Operand(def.physReg(), s2)).instr);
|
||||
emit_instruction(ctx, out, instr.get());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1836,6 +1836,7 @@ enum block_kind {
|
|||
block_kind_branch = 1 << 8,
|
||||
block_kind_merge = 1 << 9,
|
||||
block_kind_invert = 1 << 10,
|
||||
block_kind_discard_early_exit = 1 << 11,
|
||||
block_kind_uses_discard = 1 << 12,
|
||||
block_kind_needs_lowering = 1 << 13,
|
||||
block_kind_export_end = 1 << 15,
|
||||
|
|
|
|||
|
|
@ -2121,6 +2121,7 @@ lower_to_hw_instr(Program* program)
|
|||
|
||||
if (!discard_block) {
|
||||
discard_block = program->create_and_insert_block();
|
||||
discard_block->kind = block_kind_discard_early_exit;
|
||||
block = &program->blocks[block_idx];
|
||||
|
||||
bld.reset(discard_block);
|
||||
|
|
@ -2133,8 +2134,7 @@ lower_to_hw_instr(Program* program)
|
|||
}
|
||||
|
||||
assert(instr->operands[0].physReg() == scc);
|
||||
bld.sopp(aco_opcode::s_cbranch_scc0, Definition(exec, s2), instr->operands[0],
|
||||
discard_block->index);
|
||||
bld.sopp(aco_opcode::s_cbranch_scc0, instr->operands[0], discard_block->index);
|
||||
|
||||
discard_block->linear_preds.push_back(block->index);
|
||||
block->linear_succs.push_back(discard_block->index);
|
||||
|
|
|
|||
|
|
@ -226,6 +226,39 @@ BEGIN_TEST(assembler.long_jump.constaddr)
|
|||
finish_assembler_test();
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(assembler.long_jump.discard_early_exit)
|
||||
if (!setup_cs(NULL, (amd_gfx_level)GFX10))
|
||||
return;
|
||||
|
||||
//! BB0:
|
||||
//! s_cbranch_scc1 BB1 ; bf850006
|
||||
//! s_getpc_b64 s[0:1] ; be801f00
|
||||
//! s_addc_u32 s0, s0, 0x20014 ; 8200ff00 00020014
|
||||
//! s_bitcmp1_b32 s0, 0 ; bf0d8000
|
||||
//! s_bitset0_b32 s0, 0 ; be801b80
|
||||
//! s_setpc_b64 s[0:1] ; be802000
|
||||
bld.sopp(aco_opcode::s_cbranch_scc0, 2);
|
||||
|
||||
bld.reset(program->create_and_insert_block());
|
||||
|
||||
//! BB1:
|
||||
//! s_nop 1 ; bf800001
|
||||
//!(then repeated 32766 times)
|
||||
//! s_endpgm ; bf810000
|
||||
for (unsigned i = 0; i < INT16_MAX; i++)
|
||||
bld.sopp(aco_opcode::s_nop, -1, 1);
|
||||
|
||||
//! BB2:
|
||||
//! s_endpgm ; bf810000
|
||||
bld.reset(program->create_and_insert_block());
|
||||
|
||||
program->blocks[1].linear_preds.push_back(0u);
|
||||
program->blocks[2].linear_preds.push_back(0u);
|
||||
program->blocks[2].kind = block_kind_discard_early_exit;
|
||||
|
||||
finish_assembler_test();
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(assembler.v_add3)
|
||||
for (unsigned i = GFX9; i <= GFX10; i++) {
|
||||
if (!setup_cs(NULL, (amd_gfx_level)i))
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue