aco/gfx12: sign-extend s_getpc_b64

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29330>
This commit is contained in:
Rhys Perry 2024-05-16 17:10:57 +01:00 committed by Marge Bot
parent ae18c88409
commit e79a8219d2
3 changed files with 38 additions and 24 deletions

View file

@ -1537,7 +1537,8 @@ emit_long_jump(asm_context& ctx, SALU_instruction* branch, bool backwards,
case aco_opcode::s_cbranch_execnz: inv = aco_opcode::s_cbranch_execz; break;
default: unreachable("Unhandled long jump.");
}
instr.reset(bld.sopp(inv, 6));
unsigned size = ctx.gfx_level >= GFX12 ? 7 : 6;
instr.reset(bld.sopp(inv, size));
emit_sopp_instruction(ctx, out, instr.get(), true);
}
@ -1545,6 +1546,11 @@ emit_long_jump(asm_context& ctx, SALU_instruction* branch, bool backwards,
instr.reset(bld.sop1(aco_opcode::s_getpc_b64, def).instr);
emit_instruction(ctx, out, instr.get());
if (ctx.gfx_level >= GFX12) {
instr.reset(bld.sop1(aco_opcode::s_sext_i32_i16, def_tmp_hi, op_tmp_hi).instr);
emit_instruction(ctx, out, instr.get());
}
instr.reset(
bld.sop2(aco_opcode::s_addc_u32, def_tmp_lo, op_tmp_lo, Operand::literal32(0)).instr);
emit_instruction(ctx, out, instr.get());

View file

@ -2402,6 +2402,8 @@ lower_to_hw_instr(Program* program)
unsigned id = instr->definitions[0].tempId();
PhysReg reg = instr->definitions[0].physReg();
bld.sop1(aco_opcode::p_constaddr_getpc, instr->definitions[0], Operand::c32(id));
if (ctx.program->gfx_level >= GFX12)
bld.sop1(aco_opcode::s_sext_i32_i16, Definition(reg.advance(4), s1), Operand(reg.advance(4), s1));
bld.sop2(aco_opcode::p_constaddr_addlo, Definition(reg, s1), bld.def(s1, scc),
Operand(reg, s1), instr->operands[0], Operand::c32(id));
/* s_addc_u32 not needed because the program is in a 32-bit VA range */
@ -2424,6 +2426,8 @@ lower_to_hw_instr(Program* program)
unsigned id = instr->definitions[0].tempId();
PhysReg reg = instr->definitions[0].physReg();
bld.sop1(aco_opcode::p_resumeaddr_getpc, instr->definitions[0], Operand::c32(id));
if (ctx.program->gfx_level >= GFX12)
bld.sop1(aco_opcode::s_sext_i32_i16, Definition(reg.advance(4), s1), Operand(reg.advance(4), s1));
bld.sop2(aco_opcode::p_resumeaddr_addlo, Definition(reg, s1), bld.def(s1, scc),
Operand(reg, s1), Operand::c32(resume_block_idx), Operand::c32(id));
/* s_addc_u32 not needed because the program is in a 32-bit VA range */

View file

@ -85,35 +85,39 @@ BEGIN_TEST(assembler.long_jump.unconditional_forwards)
END_TEST
BEGIN_TEST(assembler.long_jump.conditional_forwards)
if (!setup_cs(NULL, (amd_gfx_level)GFX10))
return;
for (amd_gfx_level gfx : filter_gfx_levels({GFX10, GFX12})) {
if (!setup_cs(NULL, gfx))
continue;
//! BB0:
//! s_cbranch_scc1 BB1 ; bf850006
//! s_getpc_b64 s[0:1] ; be801f00
//! s_addc_u32 s0, s0, 0x20014 ; 8200ff00 00020014
//! s_bitcmp1_b32 s0, 0 ; bf0d8000
//! s_bitset0_b32 s0, 0 ; be801b80
//! s_setpc_b64 s[0:1] ; be802000
bld.sopp(aco_opcode::s_cbranch_scc0, Definition(PhysReg(0), s2), 2);
//! BB0:
//! s_cbranch_scc1 BB1 ; $_
//! s_getpc_b64 s[0:1] ; $_
//~gfx12! s_sext_i32_i16 s1, s1 ; $_
//~gfx10! s_addc_u32 s0, s0, 0x20014 ; $_ $_
//~gfx12! s_add_co_ci_u32 s0, s0, 0x20014 ; $_ $_
//! s_bitcmp1_b32 s0, 0 ; $_
//! s_bitset0_b32 s0, 0 ; $_
//! s_setpc_b64 s[0:1] ; $_
bld.sopp(aco_opcode::s_cbranch_scc0, Definition(PhysReg(0), s2), 2);
bld.reset(program->create_and_insert_block());
bld.reset(program->create_and_insert_block());
//! BB1:
//! s_nop 0 ; bf800000
//!(then repeated 32767 times)
for (unsigned i = 0; i < INT16_MAX + 1; i++)
bld.sopp(aco_opcode::s_nop, 0);
//! BB1:
//! s_nop 0 ; bf800000
//!(then repeated 32767 times)
for (unsigned i = 0; i < INT16_MAX + 1; i++)
bld.sopp(aco_opcode::s_nop, 0);
//! BB2:
//! s_endpgm ; bf810000
bld.reset(program->create_and_insert_block());
//! BB2:
//! s_endpgm ; $_
bld.reset(program->create_and_insert_block());
program->blocks[1].linear_preds.push_back(0u);
program->blocks[2].linear_preds.push_back(0u);
program->blocks[2].linear_preds.push_back(1u);
program->blocks[1].linear_preds.push_back(0u);
program->blocks[2].linear_preds.push_back(0u);
program->blocks[2].linear_preds.push_back(1u);
finish_assembler_test();
finish_assembler_test();
}
END_TEST
BEGIN_TEST(assembler.long_jump.unconditional_backwards)