microsoft/compiler: Add a pass for promoting ACCESS_COHERENT on loads/stores

DXIL doesn't have instruction-level coherency. We have 3 options:
1. Promote the instruction to an atomic instruction. We can only do this
   for 32-bit or 64-bit ops.
2. If using bindless, declare the local resource declaration as globally-coherent.
3. If not using bindless, add globally-coherent to the global resource declaration.

This pass does all 3 of these, stopping at the intrinsic level for supported types
of atomics, otherwise assigning to the global resource declaration, which will be
unused if we're doing bindless, where instead we'll get it from the instruction.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27348>
This commit is contained in:
Jesse Natalie 2024-01-29 14:34:35 -08:00 committed by Marge Bot
parent b74cd405d3
commit 003d2da2dc
2 changed files with 139 additions and 0 deletions

View file

@ -2671,3 +2671,141 @@ dxil_nir_guess_image_formats(nir_shader *s)
NULL);
return progress;
}
static void
set_binding_variables_coherent(nir_shader *s, nir_binding binding, nir_variable_mode modes)
{
nir_foreach_variable_with_modes(var, s, modes) {
if (var->data.binding == binding.binding &&
var->data.descriptor_set == binding.desc_set) {
var->data.access |= ACCESS_COHERENT;
}
}
}
static void
set_deref_variables_coherent(nir_shader *s, nir_deref_instr *deref)
{
while (deref->deref_type != nir_deref_type_var &&
deref->deref_type != nir_deref_type_cast) {
deref = nir_deref_instr_parent(deref);
}
if (deref->deref_type == nir_deref_type_var) {
deref->var->data.access |= ACCESS_COHERENT;
return;
}
/* For derefs with casts, we only support pre-lowered Vulkan accesses */
assert(deref->deref_type == nir_deref_type_cast);
nir_intrinsic_instr *cast_src = nir_instr_as_intrinsic(deref->parent.ssa->parent_instr);
assert(cast_src->intrinsic == nir_intrinsic_load_vulkan_descriptor);
nir_binding binding = nir_chase_binding(cast_src->src[0]);
set_binding_variables_coherent(s, binding, nir_var_mem_ssbo);
}
static nir_def *
get_atomic_for_load_store(nir_builder *b, nir_intrinsic_instr *intr, unsigned bit_size)
{
nir_def *zero = nir_imm_intN_t(b, 0, bit_size);
switch (intr->intrinsic) {
case nir_intrinsic_load_deref:
return nir_deref_atomic(b, bit_size, intr->src[0].ssa, zero, .atomic_op = nir_atomic_op_iadd);
case nir_intrinsic_load_ssbo:
return nir_ssbo_atomic(b, bit_size, intr->src[0].ssa, intr->src[1].ssa, zero, .atomic_op = nir_atomic_op_iadd);
case nir_intrinsic_image_deref_load:
return nir_image_deref_atomic(b, bit_size, intr->src[0].ssa, intr->src[1].ssa, intr->src[2].ssa, zero, .atomic_op = nir_atomic_op_iadd);
case nir_intrinsic_image_load:
return nir_image_atomic(b, bit_size, intr->src[0].ssa, intr->src[1].ssa, intr->src[2].ssa, zero, .atomic_op = nir_atomic_op_iadd);
case nir_intrinsic_store_deref:
return nir_deref_atomic(b, bit_size, intr->src[0].ssa, intr->src[1].ssa, .atomic_op = nir_atomic_op_xchg);
case nir_intrinsic_store_ssbo:
return nir_ssbo_atomic(b, bit_size, intr->src[1].ssa, intr->src[2].ssa, intr->src[0].ssa, .atomic_op = nir_atomic_op_xchg);
case nir_intrinsic_image_deref_store:
return nir_image_deref_atomic(b, bit_size, intr->src[0].ssa, intr->src[1].ssa, intr->src[2].ssa, intr->src[3].ssa, .atomic_op = nir_atomic_op_xchg);
case nir_intrinsic_image_store:
return nir_image_atomic(b, bit_size, intr->src[0].ssa, intr->src[1].ssa, intr->src[2].ssa, intr->src[3].ssa, .atomic_op = nir_atomic_op_xchg);
default:
return NULL;
}
}
static bool
lower_coherent_load_store(nir_builder *b, nir_intrinsic_instr *intr, void *context)
{
if (!nir_intrinsic_has_access(intr) || (nir_intrinsic_access(intr) & ACCESS_COHERENT) == 0)
return false;
nir_def *atomic_def = NULL;
b->cursor = nir_before_instr(&intr->instr);
switch (intr->intrinsic) {
case nir_intrinsic_load_deref:
case nir_intrinsic_load_ssbo:
case nir_intrinsic_image_deref_load:
case nir_intrinsic_image_load: {
if (intr->def.bit_size < 32 || intr->def.num_components > 1) {
if (intr->intrinsic == nir_intrinsic_load_deref)
set_deref_variables_coherent(b->shader, nir_src_as_deref(intr->src[0]));
else {
nir_binding binding = {0};
if (nir_src_is_const(intr->src[0]))
binding.binding = nir_src_as_uint(intr->src[0]);
set_binding_variables_coherent(b->shader, binding,
intr->intrinsic == nir_intrinsic_load_ssbo ? nir_var_mem_ssbo : nir_var_image);
}
return false;
}
atomic_def = get_atomic_for_load_store(b, intr, intr->def.bit_size);
nir_def_rewrite_uses(&intr->def, atomic_def);
break;
}
case nir_intrinsic_store_deref:
case nir_intrinsic_store_ssbo:
case nir_intrinsic_image_deref_store:
case nir_intrinsic_image_store: {
int resource_idx = intr->intrinsic == nir_intrinsic_store_ssbo ? 1 : 0;
int value_idx = intr->intrinsic == nir_intrinsic_store_ssbo ? 0 :
intr->intrinsic == nir_intrinsic_store_deref ? 1 : 3;
unsigned num_components = nir_intrinsic_has_write_mask(intr) ?
util_bitcount(nir_intrinsic_write_mask(intr)) : intr->src[value_idx].ssa->num_components;
if (intr->src[value_idx].ssa->bit_size < 32 || num_components > 1) {
if (intr->intrinsic == nir_intrinsic_store_deref)
set_deref_variables_coherent(b->shader, nir_src_as_deref(intr->src[resource_idx]));
else {
nir_binding binding = {0};
if (nir_src_is_const(intr->src[resource_idx]))
binding.binding = nir_src_as_uint(intr->src[resource_idx]);
set_binding_variables_coherent(b->shader, binding,
intr->intrinsic == nir_intrinsic_store_ssbo ? nir_var_mem_ssbo : nir_var_image);
}
return false;
}
atomic_def = get_atomic_for_load_store(b, intr, intr->src[value_idx].ssa->bit_size);
break;
}
default:
return false;
}
nir_intrinsic_instr *atomic = nir_instr_as_intrinsic(atomic_def->parent_instr);
nir_intrinsic_set_access(atomic, nir_intrinsic_access(intr));
if (nir_intrinsic_has_image_dim(intr))
nir_intrinsic_set_image_dim(atomic, nir_intrinsic_image_dim(intr));
if (nir_intrinsic_has_image_array(intr))
nir_intrinsic_set_image_array(atomic, nir_intrinsic_image_array(intr));
if (nir_intrinsic_has_format(intr))
nir_intrinsic_set_format(atomic, nir_intrinsic_format(intr));
if (nir_intrinsic_has_range_base(intr))
nir_intrinsic_set_range_base(atomic, nir_intrinsic_range_base(intr));
nir_instr_remove(&intr->instr);
return true;
}
bool
dxil_nir_lower_coherent_loads_and_stores(nir_shader *s)
{
return nir_shader_intrinsics_pass(s, lower_coherent_load_store,
nir_metadata_block_index | nir_metadata_dominance | nir_metadata_loop_analysis,
NULL);
}

View file

@ -86,6 +86,7 @@ bool dxil_nir_move_consts(nir_shader *s);
struct dxil_module;
bool dxil_nir_analyze_io_dependencies(struct dxil_module *mod, nir_shader *s);
bool dxil_nir_guess_image_formats(nir_shader *s);
bool dxil_nir_lower_coherent_loads_and_stores(nir_shader *s);
#ifdef __cplusplus
}