mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-03 03:28:09 +02:00
aco: sometimes join linear wait entries on logical edges
fossil-db (gfx1201): Totals from 1303 (1.64% of 79653) affected shaders: Instrs: 6920949 -> 6917692 (-0.05%); split: -0.06%, +0.01% CodeSize: 37112404 -> 37095728 (-0.04%); split: -0.05%, +0.01% Latency: 70471343 -> 70365986 (-0.15%); split: -0.15%, +0.00% InvThroughput: 11515673 -> 11504666 (-0.10%); split: -0.10%, +0.01% fossil-db (navi31): Totals from 1293 (1.62% of 79653) affected shaders: Instrs: 6500186 -> 6496761 (-0.05%); split: -0.06%, +0.01% CodeSize: 34562712 -> 34549236 (-0.04%); split: -0.04%, +0.01% Latency: 68604746 -> 68666532 (+0.09%); split: -0.15%, +0.24% InvThroughput: 11276591 -> 11284914 (+0.07%); split: -0.10%, +0.17% fossil-db (navi21): Totals from 811 (1.02% of 79653) affected shaders: Instrs: 4110953 -> 4108788 (-0.05%); split: -0.05%, +0.00% CodeSize: 22955984 -> 22948064 (-0.03%); split: -0.03%, +0.00% Latency: 35070231 -> 35064448 (-0.02%); split: -0.02%, +0.00% InvThroughput: 6945610 -> 6945053 (-0.01%); split: -0.01%, +0.00% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34978>
This commit is contained in:
parent
c1f8537131
commit
1088ac49db
2 changed files with 60 additions and 4 deletions
|
|
@ -199,7 +199,7 @@ struct wait_ctx {
|
|||
: program(program_), gfx_level(program_->gfx_level), info(info_)
|
||||
{}
|
||||
|
||||
bool join(const wait_ctx* other, bool logical)
|
||||
bool join(const wait_ctx* other, bool logical, bool logical_merge)
|
||||
{
|
||||
bool changed = (other->pending_flat_lgkm && !pending_flat_lgkm) ||
|
||||
(other->pending_flat_vm && !pending_flat_vm) || (~nonzero & other->nonzero);
|
||||
|
|
@ -212,7 +212,7 @@ struct wait_ctx {
|
|||
using iterator = std::map<PhysReg, wait_entry>::iterator;
|
||||
|
||||
for (const auto& entry : other->gpr_map) {
|
||||
if (entry.second.logical != logical) {
|
||||
if (logical_merge ? !logical : (entry.second.logical != logical)) {
|
||||
if (logical) {
|
||||
iterator it = gpr_map.find(entry.first);
|
||||
if (it != gpr_map.end()) {
|
||||
|
|
@ -917,11 +917,24 @@ insert_waitcnt(Program* program)
|
|||
continue;
|
||||
}
|
||||
|
||||
/* Sometimes the counter for an entry is incremented or removed on all logical predecessors,
|
||||
* so it might be better to join entries using the logical predecessors instead of the linear
|
||||
* ones.
|
||||
*/
|
||||
bool logical_merge =
|
||||
current.logical_preds.size() > 1 &&
|
||||
std::any_of(current.linear_preds.begin(), current.linear_preds.end(),
|
||||
[&](unsigned pred)
|
||||
{
|
||||
return std::find(current.logical_preds.begin(), current.logical_preds.end(),
|
||||
pred) == current.logical_preds.end();
|
||||
});
|
||||
|
||||
bool changed = false;
|
||||
for (unsigned b : current.linear_preds)
|
||||
changed |= ctx.join(&out_ctx[b], false);
|
||||
changed |= ctx.join(&out_ctx[b], false, logical_merge);
|
||||
for (unsigned b : current.logical_preds)
|
||||
changed |= ctx.join(&out_ctx[b], true);
|
||||
changed |= ctx.join(&out_ctx[b], true, logical_merge);
|
||||
|
||||
if (done[current.index] && !changed) {
|
||||
in_ctx[current.index] = std::move(ctx);
|
||||
|
|
|
|||
|
|
@ -756,3 +756,46 @@ BEGIN_TEST(insert_waitcnt.waw.vmem_different_lanes)
|
|||
finish_waitcnt_test();
|
||||
}
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(insert_waitcnt.divergent_branch.inc_counter)
|
||||
for (amd_gfx_level gfx : {GFX10_3, GFX11, GFX12}) {
|
||||
if (!setup_cs(NULL, gfx))
|
||||
continue;
|
||||
|
||||
Definition def_v4(PhysReg(260), v1);
|
||||
Definition def_v5(PhysReg(261), v1);
|
||||
Operand op_v0(PhysReg(256), v1);
|
||||
Operand desc_s4(PhysReg(0), s4);
|
||||
Operand desc_s8(PhysReg(8), s8);
|
||||
|
||||
//>> v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false);
|
||||
|
||||
emit_divergent_if_else(
|
||||
program.get(), bld, Operand::c64(1),
|
||||
[&]()
|
||||
{
|
||||
//>> p_unit_test 1
|
||||
//! v1: %0:v[5] = buffer_load_dword %0:s[0-3], %0:v[0], 0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1));
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, def_v5, desc_s4, op_v0, Operand::zero(), 0,
|
||||
false);
|
||||
},
|
||||
[&]()
|
||||
{
|
||||
//>> p_unit_test 2
|
||||
//! v1: %0:v[5] = buffer_load_dword %0:s[0-3], %0:v[0], 0
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2));
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, def_v5, desc_s4, op_v0, Operand::zero(), 0,
|
||||
false);
|
||||
});
|
||||
//>> p_unit_test 3
|
||||
//~gfx(10_3|11)! s_waitcnt vmcnt(1)
|
||||
//~gfx12! s_wait_loadcnt imm:1
|
||||
//! p_unit_test %0:v[4]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3));
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand(PhysReg(260), v1));
|
||||
|
||||
finish_waitcnt_test();
|
||||
}
|
||||
END_TEST
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue