diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index 4738c76dd6f..0853010bd63 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -1371,6 +1371,7 @@ nir_visitor::visit(ir_call *ir) /* Atomic result */ assert(ir->return_deref); + instr->num_components = 1; if (glsl_type_is_integer_64(ir->return_deref->type)) { nir_def_init(&instr->instr, &instr->def, ir->return_deref->type->vector_elements, 64); @@ -1438,6 +1439,7 @@ nir_visitor::visit(ir_call *ir) if (op == nir_intrinsic_image_deref_atomic || op == nir_intrinsic_image_deref_atomic_swap) { nir_intrinsic_set_atomic_op(instr, atomic_op); + instr->num_components = 1; } instr->src[0] = nir_src_for_ssa(&deref->def); diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 072e7d215d9..33b90a5dfbc 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -838,8 +838,8 @@ def image(name, src_comp=[], extra_indices=[], **kwargs): image("load", src_comp=[4, 1, 1], extra_indices=[DEST_TYPE], dest_comp=0, flags=[CAN_ELIMINATE]) image("sparse_load", src_comp=[4, 1, 1], extra_indices=[DEST_TYPE], dest_comp=0, flags=[CAN_ELIMINATE]) image("store", src_comp=[4, 1, 0, 1], extra_indices=[SRC_TYPE]) -image("atomic", src_comp=[4, 1, 1], dest_comp=1, extra_indices=[ATOMIC_OP]) -image("atomic_swap", src_comp=[4, 1, 1, 1], dest_comp=1, extra_indices=[ATOMIC_OP]) +image("atomic", src_comp=[4, 1, 0], dest_comp=0, extra_indices=[ATOMIC_OP]) +image("atomic_swap", src_comp=[4, 1, 0, 0], dest_comp=0, extra_indices=[ATOMIC_OP]) image("size", dest_comp=0, src_comp=[1], flags=[CAN_ELIMINATE, CAN_REORDER]) image("levels", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER]) image("samples", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER]) @@ -943,30 +943,34 @@ intrinsic("load_vulkan_descriptor", src_comp=[-1], dest_comp=0, # PCO global variants use a vec3 for the memory address and data, where component X # has the low 32 address bits, component Y has the high 32 address bits, and component Z # has the data parameter. +# +# Note on vector atomics: +# These work per component, not on the whole vector at once. Each component +# is atomic by itself. This means other threads might see some components +# updated while others are still old. +intrinsic("deref_atomic", src_comp=[-1, 0], dest_comp=0, indices=[ACCESS, ATOMIC_OP]) +intrinsic("ssbo_atomic", src_comp=[-1, 1, 0], dest_comp=0, indices=[ACCESS, ATOMIC_OP, OFFSET_SHIFT]) +intrinsic("shared_atomic", src_comp=[1, 0], dest_comp=0, indices=[BASE, ATOMIC_OP]) +intrinsic("shared_atomic_nv", src_comp=[1, 1, 0], dest_comp=0, indices=[BASE, ATOMIC_OP, OFFSET_SHIFT_NV]) +intrinsic("task_payload_atomic", src_comp=[1, 0], dest_comp=0, indices=[BASE, ATOMIC_OP]) +intrinsic("global_atomic", src_comp=[1, 0], dest_comp=0, indices=[ATOMIC_OP]) +intrinsic("global_atomic_2x32", src_comp=[2, 0], dest_comp=0, indices=[ATOMIC_OP]) +intrinsic("global_atomic_amd", src_comp=[1, 1, 0], dest_comp=0, indices=[BASE, ATOMIC_OP]) +intrinsic("global_atomic_agx", src_comp=[1, 1, 0], dest_comp=0, indices=[ATOMIC_OP, SIGN_EXTEND]) +intrinsic("global_atomic_nv", src_comp=[1, 1, 0], dest_comp=0, indices=[BASE, ATOMIC_OP]) +intrinsic("global_atomic_pco", src_comp=[3], dest_comp=0, indices=[ATOMIC_OP], bit_sizes=[32]) -intrinsic("deref_atomic", src_comp=[-1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP]) -intrinsic("ssbo_atomic", src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP, OFFSET_SHIFT]) -intrinsic("shared_atomic", src_comp=[1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP]) -intrinsic("shared_atomic_nv", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP, OFFSET_SHIFT_NV]) -intrinsic("task_payload_atomic", src_comp=[1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP]) -intrinsic("global_atomic", src_comp=[1, 1], dest_comp=1, indices=[ATOMIC_OP]) -intrinsic("global_atomic_2x32", src_comp=[2, 1], dest_comp=1, indices=[ATOMIC_OP]) -intrinsic("global_atomic_amd", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP]) -intrinsic("global_atomic_agx", src_comp=[1, 1, 1], dest_comp=1, indices=[ATOMIC_OP, SIGN_EXTEND]) -intrinsic("global_atomic_nv", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP]) -intrinsic("global_atomic_pco", src_comp=[3], dest_comp=1, indices=[ATOMIC_OP], bit_sizes=[32]) - -intrinsic("deref_atomic_swap", src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP]) -intrinsic("ssbo_atomic_swap", src_comp=[-1, 1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP, OFFSET_SHIFT]) -intrinsic("shared_atomic_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP]) -intrinsic("shared_atomic_swap_nv", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP, OFFSET_SHIFT_NV]) -intrinsic("task_payload_atomic_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP]) -intrinsic("global_atomic_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[ATOMIC_OP]) -intrinsic("global_atomic_swap_2x32", src_comp=[2, 1, 1], dest_comp=1, indices=[ATOMIC_OP]) -intrinsic("global_atomic_swap_amd", src_comp=[1, 1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP]) -intrinsic("global_atomic_swap_agx", src_comp=[1, 1, 1, 1], dest_comp=1, indices=[ATOMIC_OP, SIGN_EXTEND]) -intrinsic("global_atomic_swap_nv", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP]) -intrinsic("global_atomic_swap_pco", src_comp=[4], dest_comp=1, indices=[ATOMIC_OP], bit_sizes=[32]) +intrinsic("deref_atomic_swap", src_comp=[-1, 0, 0], dest_comp=0, indices=[ACCESS, ATOMIC_OP]) +intrinsic("ssbo_atomic_swap", src_comp=[-1, 1, 0, 0], dest_comp=0, indices=[ACCESS, ATOMIC_OP, OFFSET_SHIFT]) +intrinsic("shared_atomic_swap", src_comp=[1, 0, 0], dest_comp=0, indices=[BASE, ATOMIC_OP]) +intrinsic("shared_atomic_swap_nv", src_comp=[1, 0, 0], dest_comp=0, indices=[BASE, ATOMIC_OP, OFFSET_SHIFT_NV]) +intrinsic("task_payload_atomic_swap", src_comp=[1, 0, 0], dest_comp=0, indices=[BASE, ATOMIC_OP]) +intrinsic("global_atomic_swap", src_comp=[1, 0, 0], dest_comp=0, indices=[ATOMIC_OP]) +intrinsic("global_atomic_swap_2x32", src_comp=[2, 0, 0], dest_comp=0, indices=[ATOMIC_OP]) +intrinsic("global_atomic_swap_amd", src_comp=[1, 1, 0, 0], dest_comp=0, indices=[BASE, ATOMIC_OP]) +intrinsic("global_atomic_swap_agx", src_comp=[1, 0, 0, 1], dest_comp=0, indices=[ATOMIC_OP, SIGN_EXTEND]) +intrinsic("global_atomic_swap_nv", src_comp=[1, 0, 0], dest_comp=0, indices=[BASE, ATOMIC_OP]) +intrinsic("global_atomic_swap_pco", src_comp=[4], dest_comp=0, indices=[ATOMIC_OP], bit_sizes=[32]) def system_value(name, dest_comp, indices=[], bit_sizes=[32], can_reorder=True): flags = [CAN_ELIMINATE, CAN_REORDER] if can_reorder else [CAN_ELIMINATE] diff --git a/src/compiler/nir/nir_lower_atomics_to_ssbo.c b/src/compiler/nir/nir_lower_atomics_to_ssbo.c index 610ebf80fe4..2282d6f2b52 100644 --- a/src/compiler/nir/nir_lower_atomics_to_ssbo.c +++ b/src/compiler/nir/nir_lower_atomics_to_ssbo.c @@ -156,6 +156,9 @@ lower_instr(nir_intrinsic_instr *instr, unsigned ssbo_offset, nir_builder *b, un * best to take this from the dest: */ new_instr->num_components = instr->def.num_components; + } else { + /* This pass doesn't create multi-component SSBO atomics */ + new_instr->num_components = 1; } nir_def_init(&new_instr->instr, &new_instr->def, diff --git a/src/compiler/nir/nir_lower_explicit_io.c b/src/compiler/nir/nir_lower_explicit_io.c index 8e39751567e..b0a311fbc9a 100644 --- a/src/compiler/nir/nir_lower_explicit_io.c +++ b/src/compiler/nir/nir_lower_explicit_io.c @@ -1117,9 +1117,9 @@ build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin, if (addr_shift.shift) nir_intrinsic_set_offset_shift(atomic, addr_shift.shift); - assert(intrin->def.num_components == 1); - nir_def_init(&atomic->instr, &atomic->def, 1, - intrin->def.bit_size); + atomic->num_components = intrin->def.num_components; + nir_def_init(&atomic->instr, &atomic->def, + intrin->def.num_components, intrin->def.bit_size); assert(atomic->def.bit_size % 8 == 0); diff --git a/src/compiler/nir/nir_lower_image_atomics_to_global.c b/src/compiler/nir/nir_lower_image_atomics_to_global.c index 3c0b2099ba2..02d21b5f636 100644 --- a/src/compiler/nir/nir_lower_image_atomics_to_global.c +++ b/src/compiler/nir/nir_lower_image_atomics_to_global.c @@ -46,6 +46,7 @@ lower(nir_builder *b, nir_intrinsic_instr *intr, void *data) b->cursor = nir_before_instr(&intr->instr); nir_atomic_op atomic_op = nir_intrinsic_atomic_op(intr); enum pipe_format format = nir_intrinsic_format(intr); + unsigned num_comps = intr->def.num_components; unsigned bit_size = intr->def.bit_size; if (state->filter && !state->filter(intr, state->data)) @@ -65,7 +66,7 @@ lower(nir_builder *b, nir_intrinsic_instr *intr, void *data) else format_type = UTIL_FORMAT_TYPE_UNSIGNED; - format = util_format_get_array(format_type, bit_size, 1, false, + format = util_format_get_array(format_type, bit_size, num_comps, false, type_ != nir_type_float); } diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 0cba281a3fc..b006996ce02 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -4942,6 +4942,7 @@ vtn_handle_atomics(struct vtn_builder *b, SpvOp opcode, case SpvOpAtomicFAddEXT: case SpvOpAtomicFMinEXT: case SpvOpAtomicFMaxEXT: + atomic->num_components = glsl_get_vector_elements(deref_type); fill_common_atomic_sources(b, opcode, w, &atomic->src[1]); break; diff --git a/src/freedreno/ir3/ir3_nir_lower_io_offsets.c b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c index a4f542f4302..f5453ced62a 100644 --- a/src/freedreno/ir3/ir3_nir_lower_io_offsets.c +++ b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c @@ -218,8 +218,13 @@ lower_offset_for_ssbo(nir_intrinsic_instr *intrinsic, nir_builder *b, nir_intrinsic_copy_const_indices(new_intrinsic, intrinsic); - new_intrinsic->num_components = intrinsic->num_components; - + if (ir3_ssbo_opcode == nir_intrinsic_ssbo_atomic_ir3 || + ir3_ssbo_opcode == nir_intrinsic_ssbo_atomic_swap_ir3) { + assert(intrinsic->num_components == 1); + new_intrinsic->num_components = 0; + } else { + new_intrinsic->num_components = intrinsic->num_components; + } int cur_shift = nir_intrinsic_offset_shift(intrinsic); int extra_shift = shift - cur_shift; diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 3311c573095..76326b19b2f 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -2333,13 +2333,16 @@ rewrite_atomic_ssbo_instr(nir_builder *b, nir_instr *instr, struct bo_vars *bo) for (unsigned i = 0; i < num_components; i++) { nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset); nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op); + new_instr->num_components = 1; nir_def_init(&new_instr->instr, &new_instr->def, 1, intr->def.bit_size); nir_intrinsic_set_atomic_op(new_instr, nir_intrinsic_atomic_op(intr)); new_instr->src[0] = nir_src_for_ssa(&deref_arr->def); /* deref ops have no offset src, so copy the srcs after it */ - for (unsigned j = 2; j < nir_intrinsic_infos[intr->intrinsic].num_srcs; j++) + for (unsigned j = 2; j < nir_intrinsic_infos[intr->intrinsic].num_srcs; j++) { new_instr->src[j - 1] = nir_src_for_ssa(intr->src[j].ssa); + assert(new_instr->src[j - 1].ssa->num_components == 1); + } nir_builder_instr_insert(b, &new_instr->instr); result[i] = &new_instr->def; diff --git a/src/imagination/pco/pco_nir_tex.c b/src/imagination/pco/pco_nir_tex.c index a902d3943b6..1780ffc5427 100644 --- a/src/imagination/pco/pco_nir_tex.c +++ b/src/imagination/pco/pco_nir_tex.c @@ -1219,6 +1219,7 @@ lower_image(nir_builder *b, nir_intrinsic_instr *intr, void *cb_data) nir_def *atomic_swap = nir_global_atomic_swap_pco( b, + intr->num_components, addr_data, .atomic_op = nir_intrinsic_atomic_op(intr)); nir_def_rewrite_uses(&intr->def, atomic_swap); @@ -1234,6 +1235,7 @@ lower_image(nir_builder *b, nir_intrinsic_instr *intr, void *cb_data) nir_def *atomic = nir_global_atomic_pco(b, + intr->num_components, addr_data, .atomic_op = nir_intrinsic_atomic_op(intr)); nir_def_rewrite_uses(&intr->def, atomic); diff --git a/src/nouveau/compiler/nak_nir_lower_shared_atomics.c b/src/nouveau/compiler/nak_nir_lower_shared_atomics.c index ba8a799d6dc..749729e826c 100644 --- a/src/nouveau/compiler/nak_nir_lower_shared_atomics.c +++ b/src/nouveau/compiler/nak_nir_lower_shared_atomics.c @@ -141,7 +141,7 @@ build_mesh_atomic(nir_builder *b, nir_intrinsic_instr *intrin) nir_if *if_body = nir_push_if(b, nir_ieq(b, elected_thread, current_invocation)); { - current_value = nir_load_shared(b, 1, intrin->def.bit_size, offset, + current_value = nir_load_shared(b, intrin->def.num_components, intrin->def.bit_size, offset, .base = nir_intrinsic_base(intrin)); nir_def *new_value = lower_atomic_op(b, intrin, current_value); nir_store_shared(b, new_value, offset,