radv/aco,aco: use scoped barriers

fossil-db (Navi):
Totals from 109 (0.08% of 132058) affected shaders:
SGPRs: 5416 -> 5424 (+0.15%)
CodeSize: 460500 -> 460508 (+0.00%); split: -0.07%, +0.07%
Instrs: 87278 -> 87272 (-0.01%); split: -0.09%, +0.09%
Cycles: 2241996 -> 2241852 (-0.01%); split: -0.04%, +0.04%
VMEM: 33868 -> 35539 (+4.93%); split: +5.14%, -0.20%
SMEM: 7183 -> 7184 (+0.01%); split: +0.36%, -0.35%
VClause: 1857 -> 1882 (+1.35%)
SClause: 2052 -> 2055 (+0.15%); split: -0.05%, +0.19%
Copies: 6377 -> 6380 (+0.05%); split: -0.02%, +0.06%
PreSGPRs: 3391 -> 3392 (+0.03%)

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4905>
This commit is contained in:
Rhys Perry 2020-05-13 16:12:39 +01:00 committed by Marge Bot
parent d1f992f3c2
commit cd392a10d0
3 changed files with 51 additions and 12 deletions

View file

@ -6700,6 +6700,24 @@ void visit_global_atomic(isel_context *ctx, nir_intrinsic_instr *instr)
} }
} }
sync_scope translate_nir_scope(nir_scope scope)
{
switch (scope) {
case NIR_SCOPE_NONE:
case NIR_SCOPE_INVOCATION:
return scope_invocation;
case NIR_SCOPE_SUBGROUP:
return scope_subgroup;
case NIR_SCOPE_WORKGROUP:
return scope_workgroup;
case NIR_SCOPE_QUEUE_FAMILY:
return scope_queuefamily;
case NIR_SCOPE_DEVICE:
return scope_device;
}
unreachable("invalid scope");
}
void emit_memory_barrier(isel_context *ctx, nir_intrinsic_instr *instr) { void emit_memory_barrier(isel_context *ctx, nir_intrinsic_instr *instr) {
Builder bld(ctx->program, ctx->block); Builder bld(ctx->program, ctx->block);
storage_class all_mem = (storage_class)(storage_buffer | storage_image | storage_atomic_counter | storage_shared); storage_class all_mem = (storage_class)(storage_buffer | storage_image | storage_atomic_counter | storage_shared);
@ -6713,20 +6731,44 @@ void emit_memory_barrier(isel_context *ctx, nir_intrinsic_instr *instr) {
memory_sync_info(all_mem, semantic_acqrel, scope_device)); memory_sync_info(all_mem, semantic_acqrel, scope_device));
break; break;
case nir_intrinsic_memory_barrier_buffer: case nir_intrinsic_memory_barrier_buffer:
case nir_intrinsic_memory_barrier_image:
/* since NIR splits barriers, we have to unify buffer and image barriers
* for now so dEQP-VK.memory_model.message_passing.core11.u32.coherent.
* fence_fence.atomicwrite.device.payload_nonlocal.buffer.guard_nonlocal.image.comp
* passes
*/
bld.barrier(aco_opcode::p_barrier, bld.barrier(aco_opcode::p_barrier,
memory_sync_info((storage_class)(storage_buffer | storage_image), semantic_acqrel, scope_device)); memory_sync_info((storage_class)storage_buffer, semantic_acqrel, scope_device));
case nir_intrinsic_memory_barrier_image:
bld.barrier(aco_opcode::p_barrier,
memory_sync_info((storage_class)storage_image, semantic_acqrel, scope_device));
break; break;
case nir_intrinsic_memory_barrier_tcs_patch: case nir_intrinsic_memory_barrier_tcs_patch:
case nir_intrinsic_memory_barrier_shared: case nir_intrinsic_memory_barrier_shared:
bld.barrier(aco_opcode::p_barrier, bld.barrier(aco_opcode::p_barrier,
memory_sync_info(storage_shared, semantic_acqrel, scope_workgroup)); memory_sync_info(storage_shared, semantic_acqrel, scope_workgroup));
break; break;
case nir_intrinsic_scoped_barrier: {
unsigned semantics = 0;
unsigned storage = 0;
sync_scope mem_scope = translate_nir_scope(nir_intrinsic_memory_scope(instr));
sync_scope exec_scope = translate_nir_scope(nir_intrinsic_execution_scope(instr));
unsigned nir_storage = nir_intrinsic_memory_modes(instr);
if (nir_storage & (nir_var_mem_ssbo | nir_var_mem_global))
storage |= storage_buffer | storage_image; //TODO: split this when NIR gets nir_var_mem_image
if (ctx->shader->info.stage == MESA_SHADER_COMPUTE && (nir_storage & nir_var_mem_shared))
storage |= storage_shared;
if (ctx->shader->info.stage == MESA_SHADER_TESS_CTRL && (nir_storage & nir_var_shader_out))
storage |= storage_shared;
unsigned nir_semantics = nir_intrinsic_memory_semantics(instr);
if (nir_semantics & NIR_MEMORY_ACQUIRE)
semantics |= semantic_acquire;
if (nir_semantics & NIR_MEMORY_RELEASE)
semantics |= semantic_release;
assert(!(nir_semantics & (NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE)));
bld.barrier(aco_opcode::p_barrier,
memory_sync_info((storage_class)storage, (memory_semantics)semantics, mem_scope),
exec_scope);
break;
}
default: default:
unreachable("Unimplemented memory barrier intrinsic"); unreachable("Unimplemented memory barrier intrinsic");
break; break;
@ -7568,6 +7610,7 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
case nir_intrinsic_memory_barrier_buffer: case nir_intrinsic_memory_barrier_buffer:
case nir_intrinsic_memory_barrier_image: case nir_intrinsic_memory_barrier_image:
case nir_intrinsic_memory_barrier_shared: case nir_intrinsic_memory_barrier_shared:
case nir_intrinsic_scoped_barrier:
emit_memory_barrier(ctx, instr); emit_memory_barrier(ctx, instr);
break; break;
case nir_intrinsic_load_num_work_groups: { case nir_intrinsic_load_num_work_groups: {

View file

@ -494,11 +494,6 @@ HazardResult perform_hazard_query(hazard_query *query, Instruction *instr, bool
if (first->bar_classes && second->bar_classes) if (first->bar_classes && second->bar_classes)
return hazard_fail_barrier; return hazard_fail_barrier;
/* Don't move memory loads/stores to before control barriers. This is to make
* memory barriers followed by control barriers work. */
if (first->has_control_barrier && (second->access_atomic | second->access_relaxed))
return hazard_fail_barrier;
/* don't move memory loads/stores past potentially aliasing loads/stores */ /* don't move memory loads/stores past potentially aliasing loads/stores */
unsigned aliasing_storage = instr->format == Format::SMEM ? unsigned aliasing_storage = instr->format == Format::SMEM ?
query->aliasing_storage_smem : query->aliasing_storage_smem :

View file

@ -130,6 +130,7 @@ static const struct nir_shader_compiler_options nir_options_aco = {
nir_lower_dsqrt | nir_lower_dsqrt |
nir_lower_drsq | nir_lower_drsq |
nir_lower_ddiv, nir_lower_ddiv,
.use_scoped_barrier = true,
}; };
bool bool