mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 08:58:02 +02:00
aco/gfx10: skip waitcnts or use vm_vsrc(0) for workgroup vmem barriers
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36491>
This commit is contained in:
parent
145b178de2
commit
ac882985c0
3 changed files with 58 additions and 14 deletions
|
|
@ -479,37 +479,45 @@ setup_barrier(wait_ctx& ctx, wait_imm& imm, memory_sync_info sync, bool is_acqui
|
|||
}
|
||||
|
||||
void
|
||||
finish_barrier_internal(wait_ctx& ctx, wait_imm& imm, Instruction* instr, struct barrier_info* info,
|
||||
unsigned storage_idx)
|
||||
finish_barrier_internal(wait_ctx& ctx, wait_imm& imm, depctr_wait& depctr, Instruction* instr,
|
||||
struct barrier_info* info, unsigned storage_idx)
|
||||
{
|
||||
uint16_t events = info->events[storage_idx];
|
||||
bool vm_vsrc = false;
|
||||
|
||||
if (info->scope[storage_idx] <= scope_workgroup) {
|
||||
bool is_vmem = instr->isVMEM() || (instr->isFlatLike() && !instr->flatlike().may_use_lds);
|
||||
bool is_barrier = instr->isBarrier();
|
||||
bool is_barrier = instr->isBarrier(); /* This is only called for control barriers. */
|
||||
|
||||
/* Until GFX11, in non-WGP, the L1 (L0 on GFX10+) cache keeps all memory operations
|
||||
* in-order for the same workgroup */
|
||||
if ((is_vmem || is_barrier) && ctx.gfx_level < GFX11 && !ctx.program->wgp_mode)
|
||||
/* In non-WGP, the L1 (L0 on GFX10+) cache keeps all memory operations in-order for the same
|
||||
* workgroup */
|
||||
bool has_vmem_events = events & (event_vmem | event_vmem_store);
|
||||
if (has_vmem_events && (is_vmem || is_barrier) && !ctx.program->wgp_mode) {
|
||||
events &= ~(event_vmem | event_vmem_store);
|
||||
vm_vsrc |= is_barrier && ctx.gfx_level >= GFX10;
|
||||
}
|
||||
}
|
||||
|
||||
if (events)
|
||||
imm.combine(info->imm[storage_idx]);
|
||||
if (vm_vsrc)
|
||||
depctr.vm_vsrc = 0;
|
||||
}
|
||||
|
||||
void
|
||||
finish_barriers(wait_ctx& ctx, wait_imm& imm, Instruction* instr, memory_sync_info sync)
|
||||
finish_barriers(wait_ctx& ctx, wait_imm& imm, depctr_wait& depctr, Instruction* instr,
|
||||
memory_sync_info sync)
|
||||
{
|
||||
if (ctx.bar_nonempty & (1 << barrier_info_release)) {
|
||||
uint16_t storage_release =
|
||||
is_atomic_or_control_instr(ctx.program, instr, sync, semantic_release);
|
||||
u_foreach_bit (i, storage_release & ctx.bar[barrier_info_release].storage)
|
||||
finish_barrier_internal(ctx, imm, instr, &ctx.bar[barrier_info_release], i);
|
||||
finish_barrier_internal(ctx, imm, depctr, instr, &ctx.bar[barrier_info_release], i);
|
||||
}
|
||||
if (ctx.bar_nonempty & (1 << barrier_info_acquire)) {
|
||||
uint16_t storage_acquire = (sync.semantics & semantic_private) ? 0 : sync.storage;
|
||||
u_foreach_bit (i, storage_acquire & ctx.bar[barrier_info_acquire].storage)
|
||||
finish_barrier_internal(ctx, imm, instr, &ctx.bar[barrier_info_acquire], i);
|
||||
finish_barrier_internal(ctx, imm, depctr, instr, &ctx.bar[barrier_info_acquire], i);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -548,7 +556,8 @@ update_barrier_info_for_wait(wait_ctx& ctx, unsigned idx, wait_imm imm)
|
|||
}
|
||||
|
||||
void
|
||||
kill(wait_imm& imm, Instruction* instr, wait_ctx& ctx, memory_sync_info sync_info)
|
||||
kill(wait_imm& imm, depctr_wait& depctr, Instruction* instr, wait_ctx& ctx,
|
||||
memory_sync_info sync_info)
|
||||
{
|
||||
if (instr->opcode == aco_opcode::s_setpc_b64 || (debug_flags & DEBUG_FORCE_WAITCNT)) {
|
||||
/* Force emitting waitcnt states right after the instruction if there is
|
||||
|
|
@ -585,7 +594,7 @@ kill(wait_imm& imm, Instruction* instr, wait_ctx& ctx, memory_sync_info sync_inf
|
|||
setup_barrier(ctx, imm, sync_info, false);
|
||||
}
|
||||
|
||||
finish_barriers(ctx, imm, instr, sync_info);
|
||||
finish_barriers(ctx, imm, depctr, instr, sync_info);
|
||||
|
||||
if (!imm.empty()) {
|
||||
if (ctx.pending_flat_vm && imm.vm != wait_imm::unset_counter)
|
||||
|
|
@ -866,6 +875,14 @@ emit_waitcnt(wait_ctx& ctx, std::vector<aco_ptr<Instruction>>& instructions, wai
|
|||
imm.build_waitcnt(bld);
|
||||
}
|
||||
|
||||
void
|
||||
emit_depctr(wait_ctx& ctx, std::vector<aco_ptr<Instruction>>& instructions, depctr_wait& depctr)
|
||||
{
|
||||
Builder bld(ctx.program, &instructions);
|
||||
bld.sopp(aco_opcode::s_waitcnt_depctr, depctr.pack());
|
||||
depctr = depctr_wait();
|
||||
}
|
||||
|
||||
bool
|
||||
check_clause_raw(std::bitset<512>& regs_written, Instruction* instr)
|
||||
{
|
||||
|
|
@ -892,15 +909,19 @@ handle_block(Program* program, Block& block, wait_ctx& ctx)
|
|||
std::vector<aco_ptr<Instruction>> new_instructions;
|
||||
|
||||
wait_imm queued_imm;
|
||||
depctr_wait queued_depctr;
|
||||
|
||||
size_t clause_end = 0;
|
||||
for (size_t i = 0; i < block.instructions.size(); i++) {
|
||||
aco_ptr<Instruction>& instr = block.instructions[i];
|
||||
|
||||
bool is_wait = queued_imm.unpack(ctx.gfx_level, instr.get());
|
||||
bool is_wait = queued_imm.unpack(ctx.gfx_level, instr.get()) ||
|
||||
instr->opcode == aco_opcode::s_waitcnt_depctr;
|
||||
if (instr->opcode == aco_opcode::s_waitcnt_depctr)
|
||||
queued_depctr = parse_depctr_wait(instr.get());
|
||||
|
||||
memory_sync_info sync_info = get_sync_info(instr.get());
|
||||
kill(queued_imm, instr.get(), ctx, sync_info);
|
||||
kill(queued_imm, queued_depctr, instr.get(), ctx, sync_info);
|
||||
|
||||
/* At the start of a possible clause, also emit waitcnts for each instruction to avoid
|
||||
* splitting the clause.
|
||||
|
|
@ -920,7 +941,7 @@ handle_block(Program* program, Block& block, wait_ctx& ctx)
|
|||
if (!check_clause_raw(*regs_written, next))
|
||||
break;
|
||||
|
||||
kill(queued_imm, next, ctx, get_sync_info(next));
|
||||
kill(queued_imm, queued_depctr, next, ctx, get_sync_info(next));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -934,6 +955,8 @@ handle_block(Program* program, Block& block, wait_ctx& ctx)
|
|||
|
||||
if (!queued_imm.empty())
|
||||
emit_waitcnt(ctx, new_instructions, queued_imm);
|
||||
if (!queued_depctr.empty())
|
||||
emit_depctr(ctx, new_instructions, queued_depctr);
|
||||
|
||||
bool is_ordered_count_acquire =
|
||||
instr->opcode == aco_opcode::ds_ordered_count &&
|
||||
|
|
@ -956,6 +979,8 @@ handle_block(Program* program, Block& block, wait_ctx& ctx)
|
|||
|
||||
if (!queued_imm.empty())
|
||||
emit_waitcnt(ctx, new_instructions, queued_imm);
|
||||
if (!queued_depctr.empty())
|
||||
emit_depctr(ctx, new_instructions, queued_depctr);
|
||||
|
||||
block.instructions.swap(new_instructions);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1641,6 +1641,21 @@ parse_depctr_wait(const Instruction* instr)
|
|||
return res;
|
||||
}
|
||||
|
||||
uint16_t
|
||||
depctr_wait::pack() const
|
||||
{
|
||||
uint16_t imm = 0;
|
||||
imm |= (va_vdst & 0xf) << 12;
|
||||
imm |= (va_sdst & 0x7) << 9;
|
||||
imm |= (va_ssrc & 0x1) << 8;
|
||||
imm |= (hold_cnt & 0x1) << 7;
|
||||
imm |= 0x3 << 5; /* don't know what this is, if anything */
|
||||
imm |= (vm_vsrc & 0x7) << 2;
|
||||
imm |= (va_vcc & 0x1) << 1;
|
||||
imm |= (sa_sdst & 0x1);
|
||||
return imm;
|
||||
}
|
||||
|
||||
bool
|
||||
dealloc_vgprs(Program* program)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1976,6 +1976,10 @@ struct depctr_wait {
|
|||
};
|
||||
unsigned packed = -1;
|
||||
};
|
||||
|
||||
bool empty() const { return packed == (unsigned)-1; }
|
||||
|
||||
uint16_t pack() const;
|
||||
};
|
||||
|
||||
depctr_wait parse_depctr_wait(const Instruction* instr);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue