mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 02:58:05 +02:00
aco: skip waitcnt between two vmem writing different halves
fossil-db (gfx1201): Totals from 4 (0.01% of 79653) affected shaders: Instrs: 41374 -> 41380 (+0.01%); split: -0.01%, +0.02% CodeSize: 238912 -> 238924 (+0.01%); split: -0.01%, +0.01% Latency: 706714 -> 706410 (-0.04%) InvThroughput: 352269 -> 352118 (-0.04%) VClause: 803 -> 798 (-0.62%) fossil-db (navi31): Totals from 0 (0.00% of 79653) affected shaders: fossil-db (navi21): Totals from 0 (0.00% of 79653) affected shaders: Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/13028 Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34978>
This commit is contained in:
parent
9a38ad3ca7
commit
9649deb50e
2 changed files with 87 additions and 2 deletions
|
|
@ -350,7 +350,7 @@ check_instr(wait_ctx& ctx, wait_imm& wait, Instruction* instr)
|
|||
* VMEM loads do not write the same lanes. Since GFX11, we track VMEM operations on the
|
||||
* linear CFG, so this is difficult */
|
||||
uint8_t vmem_type = get_vmem_type(ctx.gfx_level, instr);
|
||||
if (vmem_type && ctx.gfx_level < GFX12) {
|
||||
if (vmem_type) {
|
||||
wait_event event = get_vmem_event(ctx, instr, vmem_type);
|
||||
wait_type type = (wait_type)(ffs(ctx.info->get_counters_for_event(event)) - 1);
|
||||
|
||||
|
|
@ -359,7 +359,13 @@ check_instr(wait_ctx& ctx, wait_imm& wait, Instruction* instr)
|
|||
bool type_matches = type != wait_type_vm || (it->second.vmem_types == vmem_type &&
|
||||
util_bitcount(vmem_type) == 1);
|
||||
|
||||
if (event_matches && type_matches)
|
||||
bool different_halves = false;
|
||||
if (event == event_vmem && event_matches) {
|
||||
uint32_t mask = (get_vmem_mask(ctx, instr) >> (j * 2)) & 0x3;
|
||||
different_halves = !(mask & it->second.vm_mask);
|
||||
}
|
||||
|
||||
if ((event_matches && type_matches && ctx.gfx_level < GFX12) || different_halves)
|
||||
reg_imm[type] = wait_imm::unset_counter;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -612,3 +612,82 @@ BEGIN_TEST(insert_waitcnt.vmem_ds)
|
|||
|
||||
finish_waitcnt_test();
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(insert_waitcnt.waw.vmem_different_halves)
|
||||
if (!setup_cs(NULL, GFX12))
|
||||
return;
|
||||
|
||||
Definition def_v4_lo(PhysReg(260), v2b);
|
||||
Definition def_v4_hi(PhysReg(260).advance(2), v2b);
|
||||
Operand op_v0(PhysReg(256), v1);
|
||||
Operand desc_s4(PhysReg(0), s4);
|
||||
Operand desc_s8(PhysReg(8), s8);
|
||||
|
||||
//>> p_unit_test 0
|
||||
//! v2b: %0:v[4][0:16] = buffer_load_short_d16 %0:s[0-3], %0:v[0], 0
|
||||
//! v2b: %0:v[4][16:32] = buffer_load_short_d16_hi %0:s[0-3], %0:v[0], 0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
|
||||
bld.mubuf(aco_opcode::buffer_load_short_d16, def_v4_lo, desc_s4, op_v0, Operand::zero(), 0,
|
||||
false);
|
||||
bld.mubuf(aco_opcode::buffer_load_short_d16_hi, def_v4_hi, desc_s4, op_v0, Operand::zero(), 0,
|
||||
false);
|
||||
|
||||
//>> p_unit_test 1
|
||||
//! v2b: %0:v[4][16:32] = buffer_load_short_d16_hi %0:s[0-3], %0:v[0], 0
|
||||
//! v2b: %0:v[4][0:16] = buffer_load_short_d16 %0:s[0-3], %0:v[0], 0
|
||||
bld.reset(program->create_and_insert_block());
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1));
|
||||
bld.mubuf(aco_opcode::buffer_load_short_d16_hi, def_v4_hi, desc_s4, op_v0, Operand::zero(), 0,
|
||||
false);
|
||||
bld.mubuf(aco_opcode::buffer_load_short_d16, def_v4_lo, desc_s4, op_v0, Operand::zero(), 0,
|
||||
false);
|
||||
|
||||
//>> p_unit_test 2
|
||||
//! v2b: %0:v[4][0:16] = buffer_load_short_d16 %0:s[0-3], %0:v[0], 0
|
||||
//! s_wait_loadcnt imm:0
|
||||
//! v2b: %0:v[4][0:16] = buffer_load_short_d16 %0:s[0-3], %0:v[0], 0
|
||||
bld.reset(program->create_and_insert_block());
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2));
|
||||
bld.mubuf(aco_opcode::buffer_load_short_d16, def_v4_lo, desc_s4, op_v0, Operand::zero(), 0,
|
||||
false);
|
||||
bld.mubuf(aco_opcode::buffer_load_short_d16, def_v4_lo, desc_s4, op_v0, Operand::zero(), 0,
|
||||
false);
|
||||
|
||||
//>> p_unit_test 3
|
||||
//! v2b: %0:v[4][16:32] = buffer_load_short_d16_hi %0:s[0-3], %0:v[0], 0
|
||||
//! s_wait_loadcnt imm:0
|
||||
//! v2b: %0:v[4][16:32] = buffer_load_short_d16_hi %0:s[0-3], %0:v[0], 0
|
||||
bld.reset(program->create_and_insert_block());
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3));
|
||||
bld.mubuf(aco_opcode::buffer_load_short_d16_hi, def_v4_hi, desc_s4, op_v0, Operand::zero(), 0,
|
||||
false);
|
||||
bld.mubuf(aco_opcode::buffer_load_short_d16_hi, def_v4_hi, desc_s4, op_v0, Operand::zero(), 0,
|
||||
false);
|
||||
|
||||
//>> p_unit_test 4
|
||||
//! v2b: %0:v[4][0:16] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d d16
|
||||
//! s_wait_samplecnt imm:0
|
||||
//! v2b: %0:v[4][16:32] = buffer_load_short_d16_hi %0:s[0-3], %0:v[0], 0
|
||||
bld.reset(program->create_and_insert_block());
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
|
||||
Instruction* instr =
|
||||
bld.mimg(aco_opcode::image_sample, def_v4_lo, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
instr->mimg().dmask = 0x1;
|
||||
instr->mimg().d16 = true;
|
||||
bld.mubuf(aco_opcode::buffer_load_short_d16_hi, def_v4_hi, desc_s4, op_v0, Operand::zero(), 0,
|
||||
false);
|
||||
|
||||
//>> p_unit_test 5
|
||||
//! v2b: %0:v[4][16:32] = buffer_load_short_d16_hi %0:s[0-3], %0:v[0], 0
|
||||
//! s_wait_loadcnt imm:0
|
||||
//! v2b: %0:v[4][0:16] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d d16
|
||||
bld.reset(program->create_and_insert_block());
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5));
|
||||
bld.mubuf(aco_opcode::buffer_load_short_d16_hi, def_v4_hi, desc_s4, op_v0, Operand::zero(), 0,
|
||||
false);
|
||||
instr = bld.mimg(aco_opcode::image_sample, def_v4_lo, desc_s8, desc_s4, Operand(v1), op_v0);
|
||||
instr->mimg().dmask = 0x1;
|
||||
instr->mimg().d16 = true;
|
||||
|
||||
finish_waitcnt_test();
|
||||
END_TEST
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue