diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index d3812b56645..2da14876b9b 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -5962,6 +5962,7 @@ typedef bool (*nir_combine_barrier_cb)( bool nir_opt_combine_barriers(nir_shader *shader, nir_combine_barrier_cb combine_cb, void *data); +bool nir_opt_barrier_modes(nir_shader *shader); bool nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes); diff --git a/src/compiler/nir/nir_opt_barriers.c b/src/compiler/nir/nir_opt_barriers.c index dc5c258b898..56e5f49140e 100644 --- a/src/compiler/nir/nir_opt_barriers.c +++ b/src/compiler/nir/nir_opt_barriers.c @@ -22,6 +22,8 @@ */ #include "nir.h" +#include "nir_worklist.h" +#include "util/u_vector.h" static bool combine_all_barriers(nir_intrinsic_instr *a, nir_intrinsic_instr *b, void *_) @@ -99,3 +101,136 @@ nir_opt_combine_barriers(nir_shader *shader, return progress; } + +static bool +barrier_happens_before(const nir_instr *a, const nir_instr *b) +{ + if (a->block == b->block) + return a->index < b->index; + + return nir_block_dominates(a->block, b->block); +} + +static bool +nir_opt_barrier_modes_impl(nir_function_impl *impl) +{ + bool progress = false; + + nir_instr_worklist *barriers = nir_instr_worklist_create(); + if (!barriers) + return false; + + struct u_vector mem_derefs; + if (!u_vector_init(&mem_derefs, 32, sizeof(struct nir_instr *))) { + nir_instr_worklist_destroy(barriers); + return false; + } + + const unsigned all_memory_modes = nir_var_image | + nir_var_mem_ssbo | + nir_var_mem_shared | + nir_var_mem_global; + + nir_foreach_block_safe(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + if (intrin->intrinsic == nir_intrinsic_barrier) + nir_instr_worklist_push_tail(barriers, instr); + + } else if (instr->type == nir_instr_type_deref) { + nir_deref_instr *deref = nir_instr_as_deref(instr); + + if (nir_deref_mode_may_be(deref, all_memory_modes) || + glsl_contains_atomic(deref->type)) { + nir_deref_instr **tail = u_vector_add(&mem_derefs); + *tail = deref; + } + } + } + } + + nir_foreach_instr_in_worklist(instr, barriers) { + nir_intrinsic_instr *barrier = nir_instr_as_intrinsic(instr); + + const unsigned barrier_modes = nir_intrinsic_memory_modes(barrier); + unsigned new_modes = barrier_modes & ~all_memory_modes; + + /* If a barrier dominates all memory accesses for a particular mode (or + * there are none), then the barrier cannot affect those accesses. We + * can drop that mode from the barrier. + * + * For each barrier, we look at the list of memory derefs, and see if + * the barrier fails to dominate the deref. If so, then there's at + * least one memory access that may happen before the barrier, so we + * need to keep the mode. Any modes not kept are discarded. + */ + nir_deref_instr **p_deref; + u_vector_foreach(p_deref, &mem_derefs) { + nir_deref_instr *deref = *p_deref; + const unsigned atomic_mode = + glsl_contains_atomic(deref->type) ? nir_var_mem_ssbo : 0; + const unsigned deref_modes = + (deref->modes | atomic_mode) & barrier_modes; + + if (deref_modes && + !barrier_happens_before(&barrier->instr, &deref->instr)) + new_modes |= deref_modes; + } + + /* If we don't need all the modes, update the barrier. */ + if (barrier_modes != new_modes) { + nir_intrinsic_set_memory_modes(barrier, new_modes); + progress = true; + } + } + + nir_instr_worklist_destroy(barriers); + u_vector_finish(&mem_derefs); + + return progress; +} + +/** + * Reduce barriers to remove unnecessary modes. + * + * This pass must be called before nir_lower_explicit_io lowers derefs! + * + * Many shaders issue full memory barriers, which may need to synchronize + * access to images, SSBOs, shared local memory, or global memory. However, + * many of them only use a subset of those memory types - say, only SSBOs. + * + * Shaders may also have patterns such as: + * + * 1. shared local memory access + * 2. barrier with full variable modes + * 3. more shared local memory access + * 4. image access + * + * In this case, the barrier is needed to ensure synchronization between the + * various shared memory operations. Image reads and writes do also exist, + * but they are all on one side of the barrier, so it is a no-op for image + * access. We can drop the image mode from the barrier in this case too. + */ +bool +nir_opt_barrier_modes(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function_impl(impl, shader) { + nir_metadata_require(impl, nir_metadata_dominance | + nir_metadata_instr_index); + + if (nir_opt_barrier_modes_impl(impl)) { + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance | + nir_metadata_live_defs); + progress = true; + } else { + nir_metadata_preserve(impl, nir_metadata_all); + } + } + + return progress; +}