mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-18 15:08:24 +02:00
nir: Allow atomic intrinsics to have multiple components
v2 (Sid): Handle image and ssbo atomics having only one component in
ir3, glsl, pco, and zink
Co-authored-by: Sid Pranjale <sidpranjale127@protonmail.com>
Reviewed-by: Mel Henning <mhenning@darkrefraction.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37888>
This commit is contained in:
parent
28aba5fba4
commit
c6a98cb2d0
10 changed files with 54 additions and 33 deletions
|
|
@ -1371,6 +1371,7 @@ nir_visitor::visit(ir_call *ir)
|
|||
|
||||
/* Atomic result */
|
||||
assert(ir->return_deref);
|
||||
instr->num_components = 1;
|
||||
if (glsl_type_is_integer_64(ir->return_deref->type)) {
|
||||
nir_def_init(&instr->instr, &instr->def,
|
||||
ir->return_deref->type->vector_elements, 64);
|
||||
|
|
@ -1438,6 +1439,7 @@ nir_visitor::visit(ir_call *ir)
|
|||
if (op == nir_intrinsic_image_deref_atomic ||
|
||||
op == nir_intrinsic_image_deref_atomic_swap) {
|
||||
nir_intrinsic_set_atomic_op(instr, atomic_op);
|
||||
instr->num_components = 1;
|
||||
}
|
||||
|
||||
instr->src[0] = nir_src_for_ssa(&deref->def);
|
||||
|
|
|
|||
|
|
@ -838,8 +838,8 @@ def image(name, src_comp=[], extra_indices=[], **kwargs):
|
|||
image("load", src_comp=[4, 1, 1], extra_indices=[DEST_TYPE], dest_comp=0, flags=[CAN_ELIMINATE])
|
||||
image("sparse_load", src_comp=[4, 1, 1], extra_indices=[DEST_TYPE], dest_comp=0, flags=[CAN_ELIMINATE])
|
||||
image("store", src_comp=[4, 1, 0, 1], extra_indices=[SRC_TYPE])
|
||||
image("atomic", src_comp=[4, 1, 1], dest_comp=1, extra_indices=[ATOMIC_OP])
|
||||
image("atomic_swap", src_comp=[4, 1, 1, 1], dest_comp=1, extra_indices=[ATOMIC_OP])
|
||||
image("atomic", src_comp=[4, 1, 0], dest_comp=0, extra_indices=[ATOMIC_OP])
|
||||
image("atomic_swap", src_comp=[4, 1, 0, 0], dest_comp=0, extra_indices=[ATOMIC_OP])
|
||||
image("size", dest_comp=0, src_comp=[1], flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
image("levels", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
image("samples", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
|
|
@ -943,30 +943,34 @@ intrinsic("load_vulkan_descriptor", src_comp=[-1], dest_comp=0,
|
|||
# PCO global variants use a vec3 for the memory address and data, where component X
|
||||
# has the low 32 address bits, component Y has the high 32 address bits, and component Z
|
||||
# has the data parameter.
|
||||
#
|
||||
# Note on vector atomics:
|
||||
# These work per component, not on the whole vector at once. Each component
|
||||
# is atomic by itself. This means other threads might see some components
|
||||
# updated while others are still old.
|
||||
intrinsic("deref_atomic", src_comp=[-1, 0], dest_comp=0, indices=[ACCESS, ATOMIC_OP])
|
||||
intrinsic("ssbo_atomic", src_comp=[-1, 1, 0], dest_comp=0, indices=[ACCESS, ATOMIC_OP, OFFSET_SHIFT])
|
||||
intrinsic("shared_atomic", src_comp=[1, 0], dest_comp=0, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("shared_atomic_nv", src_comp=[1, 1, 0], dest_comp=0, indices=[BASE, ATOMIC_OP, OFFSET_SHIFT_NV])
|
||||
intrinsic("task_payload_atomic", src_comp=[1, 0], dest_comp=0, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("global_atomic", src_comp=[1, 0], dest_comp=0, indices=[ATOMIC_OP])
|
||||
intrinsic("global_atomic_2x32", src_comp=[2, 0], dest_comp=0, indices=[ATOMIC_OP])
|
||||
intrinsic("global_atomic_amd", src_comp=[1, 1, 0], dest_comp=0, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("global_atomic_agx", src_comp=[1, 1, 0], dest_comp=0, indices=[ATOMIC_OP, SIGN_EXTEND])
|
||||
intrinsic("global_atomic_nv", src_comp=[1, 1, 0], dest_comp=0, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("global_atomic_pco", src_comp=[3], dest_comp=0, indices=[ATOMIC_OP], bit_sizes=[32])
|
||||
|
||||
intrinsic("deref_atomic", src_comp=[-1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP])
|
||||
intrinsic("ssbo_atomic", src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP, OFFSET_SHIFT])
|
||||
intrinsic("shared_atomic", src_comp=[1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("shared_atomic_nv", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP, OFFSET_SHIFT_NV])
|
||||
intrinsic("task_payload_atomic", src_comp=[1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("global_atomic", src_comp=[1, 1], dest_comp=1, indices=[ATOMIC_OP])
|
||||
intrinsic("global_atomic_2x32", src_comp=[2, 1], dest_comp=1, indices=[ATOMIC_OP])
|
||||
intrinsic("global_atomic_amd", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("global_atomic_agx", src_comp=[1, 1, 1], dest_comp=1, indices=[ATOMIC_OP, SIGN_EXTEND])
|
||||
intrinsic("global_atomic_nv", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("global_atomic_pco", src_comp=[3], dest_comp=1, indices=[ATOMIC_OP], bit_sizes=[32])
|
||||
|
||||
intrinsic("deref_atomic_swap", src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP])
|
||||
intrinsic("ssbo_atomic_swap", src_comp=[-1, 1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP, OFFSET_SHIFT])
|
||||
intrinsic("shared_atomic_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("shared_atomic_swap_nv", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP, OFFSET_SHIFT_NV])
|
||||
intrinsic("task_payload_atomic_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("global_atomic_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[ATOMIC_OP])
|
||||
intrinsic("global_atomic_swap_2x32", src_comp=[2, 1, 1], dest_comp=1, indices=[ATOMIC_OP])
|
||||
intrinsic("global_atomic_swap_amd", src_comp=[1, 1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("global_atomic_swap_agx", src_comp=[1, 1, 1, 1], dest_comp=1, indices=[ATOMIC_OP, SIGN_EXTEND])
|
||||
intrinsic("global_atomic_swap_nv", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("global_atomic_swap_pco", src_comp=[4], dest_comp=1, indices=[ATOMIC_OP], bit_sizes=[32])
|
||||
intrinsic("deref_atomic_swap", src_comp=[-1, 0, 0], dest_comp=0, indices=[ACCESS, ATOMIC_OP])
|
||||
intrinsic("ssbo_atomic_swap", src_comp=[-1, 1, 0, 0], dest_comp=0, indices=[ACCESS, ATOMIC_OP, OFFSET_SHIFT])
|
||||
intrinsic("shared_atomic_swap", src_comp=[1, 0, 0], dest_comp=0, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("shared_atomic_swap_nv", src_comp=[1, 0, 0], dest_comp=0, indices=[BASE, ATOMIC_OP, OFFSET_SHIFT_NV])
|
||||
intrinsic("task_payload_atomic_swap", src_comp=[1, 0, 0], dest_comp=0, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("global_atomic_swap", src_comp=[1, 0, 0], dest_comp=0, indices=[ATOMIC_OP])
|
||||
intrinsic("global_atomic_swap_2x32", src_comp=[2, 0, 0], dest_comp=0, indices=[ATOMIC_OP])
|
||||
intrinsic("global_atomic_swap_amd", src_comp=[1, 1, 0, 0], dest_comp=0, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("global_atomic_swap_agx", src_comp=[1, 0, 0, 1], dest_comp=0, indices=[ATOMIC_OP, SIGN_EXTEND])
|
||||
intrinsic("global_atomic_swap_nv", src_comp=[1, 0, 0], dest_comp=0, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("global_atomic_swap_pco", src_comp=[4], dest_comp=0, indices=[ATOMIC_OP], bit_sizes=[32])
|
||||
|
||||
def system_value(name, dest_comp, indices=[], bit_sizes=[32], can_reorder=True):
|
||||
flags = [CAN_ELIMINATE, CAN_REORDER] if can_reorder else [CAN_ELIMINATE]
|
||||
|
|
|
|||
|
|
@ -156,6 +156,9 @@ lower_instr(nir_intrinsic_instr *instr, unsigned ssbo_offset, nir_builder *b, un
|
|||
* best to take this from the dest:
|
||||
*/
|
||||
new_instr->num_components = instr->def.num_components;
|
||||
} else {
|
||||
/* This pass doesn't create multi-component SSBO atomics */
|
||||
new_instr->num_components = 1;
|
||||
}
|
||||
|
||||
nir_def_init(&new_instr->instr, &new_instr->def,
|
||||
|
|
|
|||
|
|
@ -1117,9 +1117,9 @@ build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||
if (addr_shift.shift)
|
||||
nir_intrinsic_set_offset_shift(atomic, addr_shift.shift);
|
||||
|
||||
assert(intrin->def.num_components == 1);
|
||||
nir_def_init(&atomic->instr, &atomic->def, 1,
|
||||
intrin->def.bit_size);
|
||||
atomic->num_components = intrin->def.num_components;
|
||||
nir_def_init(&atomic->instr, &atomic->def,
|
||||
intrin->def.num_components, intrin->def.bit_size);
|
||||
|
||||
assert(atomic->def.bit_size % 8 == 0);
|
||||
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ lower(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||
b->cursor = nir_before_instr(&intr->instr);
|
||||
nir_atomic_op atomic_op = nir_intrinsic_atomic_op(intr);
|
||||
enum pipe_format format = nir_intrinsic_format(intr);
|
||||
unsigned num_comps = intr->def.num_components;
|
||||
unsigned bit_size = intr->def.bit_size;
|
||||
|
||||
if (state->filter && !state->filter(intr, state->data))
|
||||
|
|
@ -65,7 +66,7 @@ lower(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||
else
|
||||
format_type = UTIL_FORMAT_TYPE_UNSIGNED;
|
||||
|
||||
format = util_format_get_array(format_type, bit_size, 1, false,
|
||||
format = util_format_get_array(format_type, bit_size, num_comps, false,
|
||||
type_ != nir_type_float);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4942,6 +4942,7 @@ vtn_handle_atomics(struct vtn_builder *b, SpvOp opcode,
|
|||
case SpvOpAtomicFAddEXT:
|
||||
case SpvOpAtomicFMinEXT:
|
||||
case SpvOpAtomicFMaxEXT:
|
||||
atomic->num_components = glsl_get_vector_elements(deref_type);
|
||||
fill_common_atomic_sources(b, opcode, w, &atomic->src[1]);
|
||||
break;
|
||||
|
||||
|
|
|
|||
|
|
@ -218,8 +218,13 @@ lower_offset_for_ssbo(nir_intrinsic_instr *intrinsic, nir_builder *b,
|
|||
|
||||
nir_intrinsic_copy_const_indices(new_intrinsic, intrinsic);
|
||||
|
||||
new_intrinsic->num_components = intrinsic->num_components;
|
||||
|
||||
if (ir3_ssbo_opcode == nir_intrinsic_ssbo_atomic_ir3 ||
|
||||
ir3_ssbo_opcode == nir_intrinsic_ssbo_atomic_swap_ir3) {
|
||||
assert(intrinsic->num_components == 1);
|
||||
new_intrinsic->num_components = 0;
|
||||
} else {
|
||||
new_intrinsic->num_components = intrinsic->num_components;
|
||||
}
|
||||
int cur_shift = nir_intrinsic_offset_shift(intrinsic);
|
||||
int extra_shift = shift - cur_shift;
|
||||
|
||||
|
|
|
|||
|
|
@ -2333,13 +2333,16 @@ rewrite_atomic_ssbo_instr(nir_builder *b, nir_instr *instr, struct bo_vars *bo)
|
|||
for (unsigned i = 0; i < num_components; i++) {
|
||||
nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
|
||||
nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op);
|
||||
new_instr->num_components = 1;
|
||||
nir_def_init(&new_instr->instr, &new_instr->def, 1,
|
||||
intr->def.bit_size);
|
||||
nir_intrinsic_set_atomic_op(new_instr, nir_intrinsic_atomic_op(intr));
|
||||
new_instr->src[0] = nir_src_for_ssa(&deref_arr->def);
|
||||
/* deref ops have no offset src, so copy the srcs after it */
|
||||
for (unsigned j = 2; j < nir_intrinsic_infos[intr->intrinsic].num_srcs; j++)
|
||||
for (unsigned j = 2; j < nir_intrinsic_infos[intr->intrinsic].num_srcs; j++) {
|
||||
new_instr->src[j - 1] = nir_src_for_ssa(intr->src[j].ssa);
|
||||
assert(new_instr->src[j - 1].ssa->num_components == 1);
|
||||
}
|
||||
nir_builder_instr_insert(b, &new_instr->instr);
|
||||
|
||||
result[i] = &new_instr->def;
|
||||
|
|
|
|||
|
|
@ -1219,6 +1219,7 @@ lower_image(nir_builder *b, nir_intrinsic_instr *intr, void *cb_data)
|
|||
|
||||
nir_def *atomic_swap = nir_global_atomic_swap_pco(
|
||||
b,
|
||||
intr->num_components,
|
||||
addr_data,
|
||||
.atomic_op = nir_intrinsic_atomic_op(intr));
|
||||
nir_def_rewrite_uses(&intr->def, atomic_swap);
|
||||
|
|
@ -1234,6 +1235,7 @@ lower_image(nir_builder *b, nir_intrinsic_instr *intr, void *cb_data)
|
|||
|
||||
nir_def *atomic =
|
||||
nir_global_atomic_pco(b,
|
||||
intr->num_components,
|
||||
addr_data,
|
||||
.atomic_op = nir_intrinsic_atomic_op(intr));
|
||||
nir_def_rewrite_uses(&intr->def, atomic);
|
||||
|
|
|
|||
|
|
@ -141,7 +141,7 @@ build_mesh_atomic(nir_builder *b, nir_intrinsic_instr *intrin)
|
|||
nir_if *if_body =
|
||||
nir_push_if(b, nir_ieq(b, elected_thread, current_invocation));
|
||||
{
|
||||
current_value = nir_load_shared(b, 1, intrin->def.bit_size, offset,
|
||||
current_value = nir_load_shared(b, intrin->def.num_components, intrin->def.bit_size, offset,
|
||||
.base = nir_intrinsic_base(intrin));
|
||||
nir_def *new_value = lower_atomic_op(b, intrin, current_value);
|
||||
nir_store_shared(b, new_value, offset,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue