nir: Add an optimization pass to reduce barrier modes

Many shaders issue full memory barriers, which may need to synchronize
access to images, SSBOs, shared local memory, or global memory.
However, many of them only use a subset of those memory types - say,
only SSBOs.

Shaders may also have patterns such as:

   1. shared local memory access
   2. barrier with full variable modes
   3. more shared local memory access
   4. image access

In this case, the barrier is needed to ensure synchronization between
the various shared memory operations.  Image reads and writes do also
exist, but they are all on one side of the barrier, so it is a no-op for
image access.  We can drop the image mode from the barrier here too.

Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24842>
This commit is contained in:
Kenneth Graunke 2023-08-21 18:53:20 -07:00 committed by Marge Bot
parent 1c3706fc28
commit 7dd897e1cd
2 changed files with 136 additions and 0 deletions

View file

@ -5962,6 +5962,7 @@ typedef bool (*nir_combine_barrier_cb)(
bool nir_opt_combine_barriers(nir_shader *shader,
nir_combine_barrier_cb combine_cb,
void *data);
bool nir_opt_barrier_modes(nir_shader *shader);
bool nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes);

View file

@ -22,6 +22,8 @@
*/
#include "nir.h"
#include "nir_worklist.h"
#include "util/u_vector.h"
static bool
combine_all_barriers(nir_intrinsic_instr *a, nir_intrinsic_instr *b, void *_)
@ -99,3 +101,136 @@ nir_opt_combine_barriers(nir_shader *shader,
return progress;
}
static bool
barrier_happens_before(const nir_instr *a, const nir_instr *b)
{
if (a->block == b->block)
return a->index < b->index;
return nir_block_dominates(a->block, b->block);
}
static bool
nir_opt_barrier_modes_impl(nir_function_impl *impl)
{
bool progress = false;
nir_instr_worklist *barriers = nir_instr_worklist_create();
if (!barriers)
return false;
struct u_vector mem_derefs;
if (!u_vector_init(&mem_derefs, 32, sizeof(struct nir_instr *))) {
nir_instr_worklist_destroy(barriers);
return false;
}
const unsigned all_memory_modes = nir_var_image |
nir_var_mem_ssbo |
nir_var_mem_shared |
nir_var_mem_global;
nir_foreach_block_safe(block, impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type == nir_instr_type_intrinsic) {
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (intrin->intrinsic == nir_intrinsic_barrier)
nir_instr_worklist_push_tail(barriers, instr);
} else if (instr->type == nir_instr_type_deref) {
nir_deref_instr *deref = nir_instr_as_deref(instr);
if (nir_deref_mode_may_be(deref, all_memory_modes) ||
glsl_contains_atomic(deref->type)) {
nir_deref_instr **tail = u_vector_add(&mem_derefs);
*tail = deref;
}
}
}
}
nir_foreach_instr_in_worklist(instr, barriers) {
nir_intrinsic_instr *barrier = nir_instr_as_intrinsic(instr);
const unsigned barrier_modes = nir_intrinsic_memory_modes(barrier);
unsigned new_modes = barrier_modes & ~all_memory_modes;
/* If a barrier dominates all memory accesses for a particular mode (or
* there are none), then the barrier cannot affect those accesses. We
* can drop that mode from the barrier.
*
* For each barrier, we look at the list of memory derefs, and see if
* the barrier fails to dominate the deref. If so, then there's at
* least one memory access that may happen before the barrier, so we
* need to keep the mode. Any modes not kept are discarded.
*/
nir_deref_instr **p_deref;
u_vector_foreach(p_deref, &mem_derefs) {
nir_deref_instr *deref = *p_deref;
const unsigned atomic_mode =
glsl_contains_atomic(deref->type) ? nir_var_mem_ssbo : 0;
const unsigned deref_modes =
(deref->modes | atomic_mode) & barrier_modes;
if (deref_modes &&
!barrier_happens_before(&barrier->instr, &deref->instr))
new_modes |= deref_modes;
}
/* If we don't need all the modes, update the barrier. */
if (barrier_modes != new_modes) {
nir_intrinsic_set_memory_modes(barrier, new_modes);
progress = true;
}
}
nir_instr_worklist_destroy(barriers);
u_vector_finish(&mem_derefs);
return progress;
}
/**
* Reduce barriers to remove unnecessary modes.
*
* This pass must be called before nir_lower_explicit_io lowers derefs!
*
* Many shaders issue full memory barriers, which may need to synchronize
* access to images, SSBOs, shared local memory, or global memory. However,
* many of them only use a subset of those memory types - say, only SSBOs.
*
* Shaders may also have patterns such as:
*
* 1. shared local memory access
* 2. barrier with full variable modes
* 3. more shared local memory access
* 4. image access
*
* In this case, the barrier is needed to ensure synchronization between the
* various shared memory operations. Image reads and writes do also exist,
* but they are all on one side of the barrier, so it is a no-op for image
* access. We can drop the image mode from the barrier in this case too.
*/
bool
nir_opt_barrier_modes(nir_shader *shader)
{
bool progress = false;
nir_foreach_function_impl(impl, shader) {
nir_metadata_require(impl, nir_metadata_dominance |
nir_metadata_instr_index);
if (nir_opt_barrier_modes_impl(impl)) {
nir_metadata_preserve(impl, nir_metadata_block_index |
nir_metadata_dominance |
nir_metadata_live_defs);
progress = true;
} else {
nir_metadata_preserve(impl, nir_metadata_all);
}
}
return progress;
}