mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-17 00:08:31 +02:00
aco: use split barrier instructions
fossil-db (gfx1201): Totals from 135 (0.06% of 208640) affected shaders: Instrs: 155940 -> 155932 (-0.01%); split: -0.02%, +0.02% CodeSize: 905460 -> 905432 (-0.00%); split: -0.02%, +0.01% Latency: 1910087 -> 1909703 (-0.02%); split: -0.02%, +0.00% InvThroughput: 886321 -> 886280 (-0.00%) Copies: 12025 -> 12024 (-0.01%) VALU: 89681 -> 89679 (-0.00%) VOPD: 177 -> 178 (+0.56%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41364>
This commit is contained in:
parent
a95f841125
commit
26b942c306
4 changed files with 27 additions and 14 deletions
|
|
@ -301,6 +301,7 @@ struct aco_export_mrt {
|
|||
void create_fs_dual_src_export_gfx11(isel_context* ctx, const struct aco_export_mrt* mrt0,
|
||||
const struct aco_export_mrt* mrt1);
|
||||
Temp lanecount_to_mask(isel_context* ctx, Temp count, unsigned bit_offset);
|
||||
void emit_barrier(Builder& bld, memory_sync_info sync, sync_scope exec_scope);
|
||||
void build_end_with_regs(isel_context* ctx, std::vector<Operand>& regs);
|
||||
Instruction* add_startpgm(struct isel_context* ctx, bool is_callee = false);
|
||||
void finish_program(isel_context* ctx);
|
||||
|
|
|
|||
|
|
@ -667,6 +667,25 @@ lanecount_to_mask(isel_context* ctx, Temp count, unsigned bit_offset)
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
emit_barrier(Builder& bld, memory_sync_info sync, sync_scope exec_scope)
|
||||
{
|
||||
if (bld.program->workgroup_size <= bld.program->wave_size) {
|
||||
exec_scope = scope_subgroup;
|
||||
if (sync.scope == scope_workgroup)
|
||||
sync.scope = scope_subgroup;
|
||||
}
|
||||
|
||||
if (bld.program->gfx_level >= GFX12 && exec_scope == scope_workgroup) {
|
||||
memory_sync_info sync_release(sync.storage, sync.semantics & semantic_release, sync.scope);
|
||||
memory_sync_info sync_acquire(sync.storage, sync.semantics & semantic_acquire, sync.scope);
|
||||
bld.barrier(aco_opcode::p_barrier_signal, sync_release, exec_scope);
|
||||
bld.barrier(aco_opcode::p_barrier_wait, sync_acquire, exec_scope);
|
||||
} else {
|
||||
bld.barrier(aco_opcode::p_barrier, sync, exec_scope);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
build_end_with_regs(isel_context* ctx, std::vector<Operand>& regs)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1522,9 +1522,8 @@ select_shader(isel_context& ctx, nir_shader* nir, const bool need_startpgm, cons
|
|||
? scope_subgroup
|
||||
: scope_workgroup;
|
||||
|
||||
Builder(ctx.program, ctx.block)
|
||||
.barrier(aco_opcode::p_barrier, memory_sync_info(storage_shared, semantic_acqrel, scope),
|
||||
scope);
|
||||
Builder bld(ctx.program, ctx.block);
|
||||
emit_barrier(bld, memory_sync_info(storage_shared, semantic_acqrel, scope), scope);
|
||||
}
|
||||
|
||||
nir_function_impl* func = nir_shader_get_entrypoint(nir);
|
||||
|
|
|
|||
|
|
@ -2895,7 +2895,7 @@ translate_nir_scope(mesa_scope scope)
|
|||
}
|
||||
|
||||
void
|
||||
emit_barrier(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
visit_barrier(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
{
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
|
||||
|
|
@ -2948,15 +2948,9 @@ emit_barrier(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
assert(!(nir_semantics & (NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE)));
|
||||
assert(exec_scope != scope_workgroup || workgroup_scope_allowed);
|
||||
|
||||
if (ctx->program->workgroup_size <= ctx->program->wave_size) {
|
||||
exec_scope = scope_subgroup;
|
||||
if (mem_scope == scope_workgroup)
|
||||
mem_scope = scope_subgroup;
|
||||
}
|
||||
|
||||
bld.barrier(aco_opcode::p_barrier,
|
||||
memory_sync_info((storage_class)storage, (memory_semantics)semantics, mem_scope),
|
||||
exec_scope);
|
||||
emit_barrier(bld,
|
||||
memory_sync_info((storage_class)storage, (memory_semantics)semantics, mem_scope),
|
||||
exec_scope);
|
||||
}
|
||||
|
||||
/* The two 32 wide halves of a gfx10+ wave64 LDS instruction might be executed interleaved
|
||||
|
|
@ -4056,7 +4050,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
case nir_intrinsic_ssbo_atomic_swap: visit_atomic_ssbo(ctx, instr); break;
|
||||
case nir_intrinsic_load_scratch: visit_load_scratch(ctx, instr); break;
|
||||
case nir_intrinsic_store_scratch: visit_store_scratch(ctx, instr); break;
|
||||
case nir_intrinsic_barrier: emit_barrier(ctx, instr); break;
|
||||
case nir_intrinsic_barrier: visit_barrier(ctx, instr); break;
|
||||
case nir_intrinsic_ddx:
|
||||
case nir_intrinsic_ddy:
|
||||
case nir_intrinsic_ddx_fine:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue