mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 11:48:06 +02:00
aco: only add/subtract low bits of program addresses
fossil-db (Sienna Cichlid): Totals from 4007 (2.47% of 162293) affected shaders: Instrs: 3733239 -> 3728018 (-0.14%) CodeSize: 20770340 -> 20749456 (-0.10%) Latency: 46883958 -> 46872764 (-0.02%); split: -0.02%, +0.00% InvThroughput: 10550392 -> 10548698 (-0.02%); split: -0.02%, +0.00% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16460>
This commit is contained in:
parent
69d1f4186a
commit
4513cb8d41
3 changed files with 11 additions and 21 deletions
|
|
@ -903,7 +903,7 @@ emit_long_jump(asm_context& ctx, SOPP_instruction* branch, bool backwards,
|
|||
case aco_opcode::s_cbranch_execnz: inv = aco_opcode::s_cbranch_execz; break;
|
||||
default: unreachable("Unhandled long jump.");
|
||||
}
|
||||
instr.reset(bld.sopp(inv, -1, 7));
|
||||
instr.reset(bld.sopp(inv, -1, 6));
|
||||
emit_instruction(ctx, out, instr.get());
|
||||
}
|
||||
|
||||
|
|
@ -916,10 +916,7 @@ emit_long_jump(asm_context& ctx, SOPP_instruction* branch, bool backwards,
|
|||
emit_instruction(ctx, out, instr.get());
|
||||
branch->pass_flags = out.size();
|
||||
|
||||
instr.reset(bld.sop2(aco_opcode::s_addc_u32, def_tmp_hi, op_tmp_hi,
|
||||
Operand::c32(backwards ? UINT32_MAX : 0u))
|
||||
.instr);
|
||||
emit_instruction(ctx, out, instr.get());
|
||||
/* s_addc_u32 for high 32 bits not needed because the program is in a 32-bit VA range */
|
||||
|
||||
/* restore SCC and clear the LSB of the new PC */
|
||||
instr.reset(bld.sopc(aco_opcode::s_bitcmp1_b32, def_tmp_lo, op_tmp_lo, Operand::zero()).instr);
|
||||
|
|
|
|||
|
|
@ -2100,8 +2100,7 @@ lower_to_hw_instr(Program* program)
|
|||
bld.sop1(aco_opcode::p_constaddr_getpc, instr->definitions[0], Operand::c32(id));
|
||||
bld.sop2(aco_opcode::p_constaddr_addlo, Definition(reg, s1), bld.def(s1, scc),
|
||||
Operand(reg, s1), instr->operands[0], Operand::c32(id));
|
||||
bld.sop2(aco_opcode::s_addc_u32, Definition(reg.advance(4), s1), bld.def(s1, scc),
|
||||
Operand(reg.advance(4), s1), Operand::zero(), Operand(scc, s1));
|
||||
/* s_addc_u32 not needed because the program is in a 32-bit VA range */
|
||||
break;
|
||||
}
|
||||
case aco_opcode::p_extract: {
|
||||
|
|
|
|||
|
|
@ -65,8 +65,7 @@ BEGIN_TEST(assembler.long_jump.unconditional_forwards)
|
|||
|
||||
//!BB0:
|
||||
//! s_getpc_b64 s[0:1] ; be801f00
|
||||
//! s_addc_u32 s0, s0, 0x20018 ; 8200ff00 00020018
|
||||
//! s_addc_u32 s1, s1, 0 ; 82018001
|
||||
//! s_addc_u32 s0, s0, 0x20014 ; 8200ff00 00020014
|
||||
//! s_bitcmp1_b32 s0, 0 ; bf0d8000
|
||||
//! s_bitset0_b32 s0, 0 ; be801b80
|
||||
//! s_setpc_b64 s[0:1] ; be802000
|
||||
|
|
@ -94,10 +93,9 @@ BEGIN_TEST(assembler.long_jump.conditional_forwards)
|
|||
return;
|
||||
|
||||
//! BB0:
|
||||
//! s_cbranch_scc1 BB1 ; bf850007
|
||||
//! s_cbranch_scc1 BB1 ; bf850006
|
||||
//! s_getpc_b64 s[0:1] ; be801f00
|
||||
//! s_addc_u32 s0, s0, 0x20018 ; 8200ff00 00020018
|
||||
//! s_addc_u32 s1, s1, 0 ; 82018001
|
||||
//! s_addc_u32 s0, s0, 0x20014 ; 8200ff00 00020014
|
||||
//! s_bitcmp1_b32 s0, 0 ; bf0d8000
|
||||
//! s_bitset0_b32 s0, 0 ; be801b80
|
||||
//! s_setpc_b64 s[0:1] ; be802000
|
||||
|
|
@ -134,7 +132,6 @@ BEGIN_TEST(assembler.long_jump.unconditional_backwards)
|
|||
|
||||
//! s_getpc_b64 s[0:1] ; be801f00
|
||||
//! s_addc_u32 s0, s0, 0xfffdfffc ; 8200ff00 fffdfffc
|
||||
//! s_addc_u32 s1, s1, -1 ; 8201c101
|
||||
//! s_bitcmp1_b32 s0, 0 ; bf0d8000
|
||||
//! s_bitset0_b32 s0, 0 ; be801b80
|
||||
//! s_setpc_b64 s[0:1] ; be802000
|
||||
|
|
@ -160,10 +157,9 @@ BEGIN_TEST(assembler.long_jump.conditional_backwards)
|
|||
for (unsigned i = 0; i < INT16_MAX + 1; i++)
|
||||
bld.sopp(aco_opcode::s_nop, -1, 0);
|
||||
|
||||
//! s_cbranch_execz BB1 ; bf880007
|
||||
//! s_cbranch_execz BB1 ; bf880006
|
||||
//! s_getpc_b64 s[0:1] ; be801f00
|
||||
//! s_addc_u32 s0, s0, 0xfffdfff8 ; 8200ff00 fffdfff8
|
||||
//! s_addc_u32 s1, s1, -1 ; 8201c101
|
||||
//! s_bitcmp1_b32 s0, 0 ; bf0d8000
|
||||
//! s_bitset0_b32 s0, 0 ; be801b80
|
||||
//! s_setpc_b64 s[0:1] ; be802000
|
||||
|
|
@ -188,7 +184,7 @@ BEGIN_TEST(assembler.long_jump.3f)
|
|||
//! s_nop 0 ; bf800000
|
||||
bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 1);
|
||||
|
||||
for (unsigned i = 0; i < 0x3f - 7; i++) // a unconditional long jump is 7 dwords
|
||||
for (unsigned i = 0; i < 0x3f - 6; i++) // a unconditional long jump is 6 dwords
|
||||
bld.vop1(aco_opcode::v_nop);
|
||||
bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 2);
|
||||
|
||||
|
|
@ -219,7 +215,7 @@ BEGIN_TEST(assembler.long_jump.constaddr)
|
|||
bld.reset(program->create_and_insert_block());
|
||||
|
||||
//>> s_getpc_b64 s[0:1] ; be801f00
|
||||
//! s_add_u32 s0, s0, 0xe0 ; 8000ff00 000000e0
|
||||
//! s_add_u32 s0, s0, 0xe4 ; 8000ff00 000000e4
|
||||
bld.sop1(aco_opcode::p_constaddr_getpc, Definition(PhysReg(0), s2), Operand::zero());
|
||||
bld.sop2(aco_opcode::p_constaddr_addlo, Definition(PhysReg(0), s1), bld.def(s1, scc),
|
||||
Operand(PhysReg(0), s1), Operand::zero(), Operand::zero());
|
||||
|
|
@ -302,13 +298,11 @@ BEGIN_TEST(assembler.p_constaddr)
|
|||
dst1.setFixed(PhysReg(2));
|
||||
|
||||
//>> s_getpc_b64 s[0:1] ; be801c00
|
||||
//! s_add_u32 s0, s0, 32 ; 8000ff00 00000020
|
||||
//! s_addc_u32 s1, s1, 0 ; 82018001
|
||||
//! s_add_u32 s0, s0, 24 ; 8000ff00 00000018
|
||||
bld.pseudo(aco_opcode::p_constaddr, dst0, Operand::zero());
|
||||
|
||||
//! s_getpc_b64 s[2:3] ; be821c00
|
||||
//! s_add_u32 s2, s2, 48 ; 8002ff02 00000030
|
||||
//! s_addc_u32 s3, s3, 0 ; 82038003
|
||||
//! s_add_u32 s2, s2, 44 ; 8002ff02 0000002c
|
||||
bld.pseudo(aco_opcode::p_constaddr, dst1, Operand::c32(32));
|
||||
|
||||
aco::lower_to_hw_instr(program.get());
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue