nir: Allow atomic intrinsics to have multiple components

v2 (Sid): Handle image and ssbo atomics having only one component in
          ir3, glsl, pco, and zink

Co-authored-by: Sid Pranjale <sidpranjale127@protonmail.com>
Reviewed-by: Mel Henning <mhenning@darkrefraction.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37888>
This commit is contained in:
Faith Ekstrand 2024-10-14 11:01:32 -05:00 committed by Marge Bot
parent 28aba5fba4
commit c6a98cb2d0
10 changed files with 54 additions and 33 deletions

View file

@ -1371,6 +1371,7 @@ nir_visitor::visit(ir_call *ir)
/* Atomic result */
assert(ir->return_deref);
instr->num_components = 1;
if (glsl_type_is_integer_64(ir->return_deref->type)) {
nir_def_init(&instr->instr, &instr->def,
ir->return_deref->type->vector_elements, 64);
@ -1438,6 +1439,7 @@ nir_visitor::visit(ir_call *ir)
if (op == nir_intrinsic_image_deref_atomic ||
op == nir_intrinsic_image_deref_atomic_swap) {
nir_intrinsic_set_atomic_op(instr, atomic_op);
instr->num_components = 1;
}
instr->src[0] = nir_src_for_ssa(&deref->def);

View file

@ -838,8 +838,8 @@ def image(name, src_comp=[], extra_indices=[], **kwargs):
image("load", src_comp=[4, 1, 1], extra_indices=[DEST_TYPE], dest_comp=0, flags=[CAN_ELIMINATE])
image("sparse_load", src_comp=[4, 1, 1], extra_indices=[DEST_TYPE], dest_comp=0, flags=[CAN_ELIMINATE])
image("store", src_comp=[4, 1, 0, 1], extra_indices=[SRC_TYPE])
image("atomic", src_comp=[4, 1, 1], dest_comp=1, extra_indices=[ATOMIC_OP])
image("atomic_swap", src_comp=[4, 1, 1, 1], dest_comp=1, extra_indices=[ATOMIC_OP])
image("atomic", src_comp=[4, 1, 0], dest_comp=0, extra_indices=[ATOMIC_OP])
image("atomic_swap", src_comp=[4, 1, 0, 0], dest_comp=0, extra_indices=[ATOMIC_OP])
image("size", dest_comp=0, src_comp=[1], flags=[CAN_ELIMINATE, CAN_REORDER])
image("levels", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER])
image("samples", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER])
@ -943,30 +943,34 @@ intrinsic("load_vulkan_descriptor", src_comp=[-1], dest_comp=0,
# PCO global variants use a vec3 for the memory address and data, where component X
# has the low 32 address bits, component Y has the high 32 address bits, and component Z
# has the data parameter.
#
# Note on vector atomics:
# These work per component, not on the whole vector at once. Each component
# is atomic by itself. This means other threads might see some components
# updated while others are still old.
intrinsic("deref_atomic", src_comp=[-1, 0], dest_comp=0, indices=[ACCESS, ATOMIC_OP])
intrinsic("ssbo_atomic", src_comp=[-1, 1, 0], dest_comp=0, indices=[ACCESS, ATOMIC_OP, OFFSET_SHIFT])
intrinsic("shared_atomic", src_comp=[1, 0], dest_comp=0, indices=[BASE, ATOMIC_OP])
intrinsic("shared_atomic_nv", src_comp=[1, 1, 0], dest_comp=0, indices=[BASE, ATOMIC_OP, OFFSET_SHIFT_NV])
intrinsic("task_payload_atomic", src_comp=[1, 0], dest_comp=0, indices=[BASE, ATOMIC_OP])
intrinsic("global_atomic", src_comp=[1, 0], dest_comp=0, indices=[ATOMIC_OP])
intrinsic("global_atomic_2x32", src_comp=[2, 0], dest_comp=0, indices=[ATOMIC_OP])
intrinsic("global_atomic_amd", src_comp=[1, 1, 0], dest_comp=0, indices=[BASE, ATOMIC_OP])
intrinsic("global_atomic_agx", src_comp=[1, 1, 0], dest_comp=0, indices=[ATOMIC_OP, SIGN_EXTEND])
intrinsic("global_atomic_nv", src_comp=[1, 1, 0], dest_comp=0, indices=[BASE, ATOMIC_OP])
intrinsic("global_atomic_pco", src_comp=[3], dest_comp=0, indices=[ATOMIC_OP], bit_sizes=[32])
intrinsic("deref_atomic", src_comp=[-1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP])
intrinsic("ssbo_atomic", src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP, OFFSET_SHIFT])
intrinsic("shared_atomic", src_comp=[1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
intrinsic("shared_atomic_nv", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP, OFFSET_SHIFT_NV])
intrinsic("task_payload_atomic", src_comp=[1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
intrinsic("global_atomic", src_comp=[1, 1], dest_comp=1, indices=[ATOMIC_OP])
intrinsic("global_atomic_2x32", src_comp=[2, 1], dest_comp=1, indices=[ATOMIC_OP])
intrinsic("global_atomic_amd", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
intrinsic("global_atomic_agx", src_comp=[1, 1, 1], dest_comp=1, indices=[ATOMIC_OP, SIGN_EXTEND])
intrinsic("global_atomic_nv", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
intrinsic("global_atomic_pco", src_comp=[3], dest_comp=1, indices=[ATOMIC_OP], bit_sizes=[32])
intrinsic("deref_atomic_swap", src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP])
intrinsic("ssbo_atomic_swap", src_comp=[-1, 1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP, OFFSET_SHIFT])
intrinsic("shared_atomic_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
intrinsic("shared_atomic_swap_nv", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP, OFFSET_SHIFT_NV])
intrinsic("task_payload_atomic_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
intrinsic("global_atomic_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[ATOMIC_OP])
intrinsic("global_atomic_swap_2x32", src_comp=[2, 1, 1], dest_comp=1, indices=[ATOMIC_OP])
intrinsic("global_atomic_swap_amd", src_comp=[1, 1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
intrinsic("global_atomic_swap_agx", src_comp=[1, 1, 1, 1], dest_comp=1, indices=[ATOMIC_OP, SIGN_EXTEND])
intrinsic("global_atomic_swap_nv", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
intrinsic("global_atomic_swap_pco", src_comp=[4], dest_comp=1, indices=[ATOMIC_OP], bit_sizes=[32])
intrinsic("deref_atomic_swap", src_comp=[-1, 0, 0], dest_comp=0, indices=[ACCESS, ATOMIC_OP])
intrinsic("ssbo_atomic_swap", src_comp=[-1, 1, 0, 0], dest_comp=0, indices=[ACCESS, ATOMIC_OP, OFFSET_SHIFT])
intrinsic("shared_atomic_swap", src_comp=[1, 0, 0], dest_comp=0, indices=[BASE, ATOMIC_OP])
intrinsic("shared_atomic_swap_nv", src_comp=[1, 0, 0], dest_comp=0, indices=[BASE, ATOMIC_OP, OFFSET_SHIFT_NV])
intrinsic("task_payload_atomic_swap", src_comp=[1, 0, 0], dest_comp=0, indices=[BASE, ATOMIC_OP])
intrinsic("global_atomic_swap", src_comp=[1, 0, 0], dest_comp=0, indices=[ATOMIC_OP])
intrinsic("global_atomic_swap_2x32", src_comp=[2, 0, 0], dest_comp=0, indices=[ATOMIC_OP])
intrinsic("global_atomic_swap_amd", src_comp=[1, 1, 0, 0], dest_comp=0, indices=[BASE, ATOMIC_OP])
intrinsic("global_atomic_swap_agx", src_comp=[1, 0, 0, 1], dest_comp=0, indices=[ATOMIC_OP, SIGN_EXTEND])
intrinsic("global_atomic_swap_nv", src_comp=[1, 0, 0], dest_comp=0, indices=[BASE, ATOMIC_OP])
intrinsic("global_atomic_swap_pco", src_comp=[4], dest_comp=0, indices=[ATOMIC_OP], bit_sizes=[32])
def system_value(name, dest_comp, indices=[], bit_sizes=[32], can_reorder=True):
flags = [CAN_ELIMINATE, CAN_REORDER] if can_reorder else [CAN_ELIMINATE]

View file

@ -156,6 +156,9 @@ lower_instr(nir_intrinsic_instr *instr, unsigned ssbo_offset, nir_builder *b, un
* best to take this from the dest:
*/
new_instr->num_components = instr->def.num_components;
} else {
/* This pass doesn't create multi-component SSBO atomics */
new_instr->num_components = 1;
}
nir_def_init(&new_instr->instr, &new_instr->def,

View file

@ -1117,9 +1117,9 @@ build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
if (addr_shift.shift)
nir_intrinsic_set_offset_shift(atomic, addr_shift.shift);
assert(intrin->def.num_components == 1);
nir_def_init(&atomic->instr, &atomic->def, 1,
intrin->def.bit_size);
atomic->num_components = intrin->def.num_components;
nir_def_init(&atomic->instr, &atomic->def,
intrin->def.num_components, intrin->def.bit_size);
assert(atomic->def.bit_size % 8 == 0);

View file

@ -46,6 +46,7 @@ lower(nir_builder *b, nir_intrinsic_instr *intr, void *data)
b->cursor = nir_before_instr(&intr->instr);
nir_atomic_op atomic_op = nir_intrinsic_atomic_op(intr);
enum pipe_format format = nir_intrinsic_format(intr);
unsigned num_comps = intr->def.num_components;
unsigned bit_size = intr->def.bit_size;
if (state->filter && !state->filter(intr, state->data))
@ -65,7 +66,7 @@ lower(nir_builder *b, nir_intrinsic_instr *intr, void *data)
else
format_type = UTIL_FORMAT_TYPE_UNSIGNED;
format = util_format_get_array(format_type, bit_size, 1, false,
format = util_format_get_array(format_type, bit_size, num_comps, false,
type_ != nir_type_float);
}

View file

@ -4942,6 +4942,7 @@ vtn_handle_atomics(struct vtn_builder *b, SpvOp opcode,
case SpvOpAtomicFAddEXT:
case SpvOpAtomicFMinEXT:
case SpvOpAtomicFMaxEXT:
atomic->num_components = glsl_get_vector_elements(deref_type);
fill_common_atomic_sources(b, opcode, w, &atomic->src[1]);
break;

View file

@ -218,8 +218,13 @@ lower_offset_for_ssbo(nir_intrinsic_instr *intrinsic, nir_builder *b,
nir_intrinsic_copy_const_indices(new_intrinsic, intrinsic);
new_intrinsic->num_components = intrinsic->num_components;
if (ir3_ssbo_opcode == nir_intrinsic_ssbo_atomic_ir3 ||
ir3_ssbo_opcode == nir_intrinsic_ssbo_atomic_swap_ir3) {
assert(intrinsic->num_components == 1);
new_intrinsic->num_components = 0;
} else {
new_intrinsic->num_components = intrinsic->num_components;
}
int cur_shift = nir_intrinsic_offset_shift(intrinsic);
int extra_shift = shift - cur_shift;

View file

@ -2333,13 +2333,16 @@ rewrite_atomic_ssbo_instr(nir_builder *b, nir_instr *instr, struct bo_vars *bo)
for (unsigned i = 0; i < num_components; i++) {
nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op);
new_instr->num_components = 1;
nir_def_init(&new_instr->instr, &new_instr->def, 1,
intr->def.bit_size);
nir_intrinsic_set_atomic_op(new_instr, nir_intrinsic_atomic_op(intr));
new_instr->src[0] = nir_src_for_ssa(&deref_arr->def);
/* deref ops have no offset src, so copy the srcs after it */
for (unsigned j = 2; j < nir_intrinsic_infos[intr->intrinsic].num_srcs; j++)
for (unsigned j = 2; j < nir_intrinsic_infos[intr->intrinsic].num_srcs; j++) {
new_instr->src[j - 1] = nir_src_for_ssa(intr->src[j].ssa);
assert(new_instr->src[j - 1].ssa->num_components == 1);
}
nir_builder_instr_insert(b, &new_instr->instr);
result[i] = &new_instr->def;

View file

@ -1219,6 +1219,7 @@ lower_image(nir_builder *b, nir_intrinsic_instr *intr, void *cb_data)
nir_def *atomic_swap = nir_global_atomic_swap_pco(
b,
intr->num_components,
addr_data,
.atomic_op = nir_intrinsic_atomic_op(intr));
nir_def_rewrite_uses(&intr->def, atomic_swap);
@ -1234,6 +1235,7 @@ lower_image(nir_builder *b, nir_intrinsic_instr *intr, void *cb_data)
nir_def *atomic =
nir_global_atomic_pco(b,
intr->num_components,
addr_data,
.atomic_op = nir_intrinsic_atomic_op(intr));
nir_def_rewrite_uses(&intr->def, atomic);

View file

@ -141,7 +141,7 @@ build_mesh_atomic(nir_builder *b, nir_intrinsic_instr *intrin)
nir_if *if_body =
nir_push_if(b, nir_ieq(b, elected_thread, current_invocation));
{
current_value = nir_load_shared(b, 1, intrin->def.bit_size, offset,
current_value = nir_load_shared(b, intrin->def.num_components, intrin->def.bit_size, offset,
.base = nir_intrinsic_base(intrin));
nir_def *new_value = lower_atomic_op(b, intrin, current_value);
nir_store_shared(b, new_value, offset,