mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 00:00:11 +01:00
pan/bi: Use unified atomics
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Jesse Natalie <jenatali@microsoft.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22914>
This commit is contained in:
parent
f5d73a9989
commit
e258083e07
2 changed files with 67 additions and 120 deletions
|
|
@ -63,14 +63,8 @@ bi_lower_divergent_indirects_impl(nir_builder *b, nir_instr *instr, void *data)
|
|||
offset = nir_get_io_offset_src(intr);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_image_atomic_add:
|
||||
case nir_intrinsic_image_atomic_imin:
|
||||
case nir_intrinsic_image_atomic_umin:
|
||||
case nir_intrinsic_image_atomic_imax:
|
||||
case nir_intrinsic_image_atomic_umax:
|
||||
case nir_intrinsic_image_atomic_and:
|
||||
case nir_intrinsic_image_atomic_or:
|
||||
case nir_intrinsic_image_atomic_xor:
|
||||
case nir_intrinsic_image_atomic:
|
||||
case nir_intrinsic_image_atomic_swap:
|
||||
case nir_intrinsic_image_load:
|
||||
case nir_intrinsic_image_store:
|
||||
/* Any image access */
|
||||
|
|
|
|||
|
|
@ -1219,55 +1219,22 @@ bi_emit_acmpxchg_to(bi_builder *b, bi_index dst, bi_index addr, nir_src *arg_1,
|
|||
bi_make_vec_to(b, dst, inout_words, NULL, sz / 32, 32);
|
||||
}
|
||||
|
||||
/* Extracts an atomic opcode */
|
||||
|
||||
static enum bi_atom_opc
|
||||
bi_atom_opc_for_nir(nir_intrinsic_op op)
|
||||
bi_atom_opc_for_nir(nir_atomic_op op)
|
||||
{
|
||||
/* clang-format off */
|
||||
switch (op) {
|
||||
case nir_intrinsic_global_atomic_add:
|
||||
case nir_intrinsic_shared_atomic_add:
|
||||
case nir_intrinsic_image_atomic_add:
|
||||
return BI_ATOM_OPC_AADD;
|
||||
|
||||
case nir_intrinsic_global_atomic_imin:
|
||||
case nir_intrinsic_shared_atomic_imin:
|
||||
case nir_intrinsic_image_atomic_imin:
|
||||
return BI_ATOM_OPC_ASMIN;
|
||||
|
||||
case nir_intrinsic_global_atomic_umin:
|
||||
case nir_intrinsic_shared_atomic_umin:
|
||||
case nir_intrinsic_image_atomic_umin:
|
||||
return BI_ATOM_OPC_AUMIN;
|
||||
|
||||
case nir_intrinsic_global_atomic_imax:
|
||||
case nir_intrinsic_shared_atomic_imax:
|
||||
case nir_intrinsic_image_atomic_imax:
|
||||
return BI_ATOM_OPC_ASMAX;
|
||||
|
||||
case nir_intrinsic_global_atomic_umax:
|
||||
case nir_intrinsic_shared_atomic_umax:
|
||||
case nir_intrinsic_image_atomic_umax:
|
||||
return BI_ATOM_OPC_AUMAX;
|
||||
|
||||
case nir_intrinsic_global_atomic_and:
|
||||
case nir_intrinsic_shared_atomic_and:
|
||||
case nir_intrinsic_image_atomic_and:
|
||||
return BI_ATOM_OPC_AAND;
|
||||
|
||||
case nir_intrinsic_global_atomic_or:
|
||||
case nir_intrinsic_shared_atomic_or:
|
||||
case nir_intrinsic_image_atomic_or:
|
||||
return BI_ATOM_OPC_AOR;
|
||||
|
||||
case nir_intrinsic_global_atomic_xor:
|
||||
case nir_intrinsic_shared_atomic_xor:
|
||||
case nir_intrinsic_image_atomic_xor:
|
||||
return BI_ATOM_OPC_AXOR;
|
||||
|
||||
default:
|
||||
unreachable("Unexpected computational atomic");
|
||||
case nir_atomic_op_iadd: return BI_ATOM_OPC_AADD;
|
||||
case nir_atomic_op_imin: return BI_ATOM_OPC_ASMIN;
|
||||
case nir_atomic_op_umin: return BI_ATOM_OPC_AUMIN;
|
||||
case nir_atomic_op_imax: return BI_ATOM_OPC_ASMAX;
|
||||
case nir_atomic_op_umax: return BI_ATOM_OPC_AUMAX;
|
||||
case nir_atomic_op_iand: return BI_ATOM_OPC_AAND;
|
||||
case nir_atomic_op_ior: return BI_ATOM_OPC_AOR;
|
||||
case nir_atomic_op_ixor: return BI_ATOM_OPC_AXOR;
|
||||
default: unreachable("Unexpected computational atomic");
|
||||
}
|
||||
/* clang-format on */
|
||||
}
|
||||
|
||||
/* Optimized unary atomics are available with an implied #1 argument */
|
||||
|
|
@ -1455,9 +1422,9 @@ bi_emit_image_store(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
|
||||
static void
|
||||
bi_emit_atomic_i32_to(bi_builder *b, bi_index dst, bi_index addr, bi_index arg,
|
||||
nir_intrinsic_op intrinsic)
|
||||
nir_atomic_op op)
|
||||
{
|
||||
enum bi_atom_opc opc = bi_atom_opc_for_nir(intrinsic);
|
||||
enum bi_atom_opc opc = bi_atom_opc_for_nir(op);
|
||||
enum bi_atom_opc post_opc = opc;
|
||||
bool bifrost = b->shader->arch <= 8;
|
||||
|
||||
|
|
@ -1616,63 +1583,66 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
*/
|
||||
break;
|
||||
|
||||
case nir_intrinsic_shared_atomic_add:
|
||||
case nir_intrinsic_shared_atomic_imin:
|
||||
case nir_intrinsic_shared_atomic_umin:
|
||||
case nir_intrinsic_shared_atomic_imax:
|
||||
case nir_intrinsic_shared_atomic_umax:
|
||||
case nir_intrinsic_shared_atomic_and:
|
||||
case nir_intrinsic_shared_atomic_or:
|
||||
case nir_intrinsic_shared_atomic_xor: {
|
||||
assert(nir_src_bit_size(instr->src[1]) == 32);
|
||||
case nir_intrinsic_shared_atomic: {
|
||||
nir_atomic_op op = nir_intrinsic_atomic_op(instr);
|
||||
|
||||
bi_index addr = bi_src_index(&instr->src[0]);
|
||||
bi_index addr_hi;
|
||||
|
||||
if (b->shader->arch >= 9) {
|
||||
bi_handle_segment(b, &addr, &addr_hi, BI_SEG_WLS, NULL);
|
||||
addr = bi_collect_v2i32(b, addr, addr_hi);
|
||||
if (op == nir_atomic_op_xchg) {
|
||||
bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1],
|
||||
BI_SEG_WLS);
|
||||
} else {
|
||||
addr = bi_seg_add_i64(b, addr, bi_zero(), false, BI_SEG_WLS);
|
||||
bi_emit_cached_split(b, addr, 64);
|
||||
assert(nir_src_bit_size(instr->src[1]) == 32);
|
||||
|
||||
bi_index addr = bi_src_index(&instr->src[0]);
|
||||
bi_index addr_hi;
|
||||
|
||||
if (b->shader->arch >= 9) {
|
||||
bi_handle_segment(b, &addr, &addr_hi, BI_SEG_WLS, NULL);
|
||||
addr = bi_collect_v2i32(b, addr, addr_hi);
|
||||
} else {
|
||||
addr = bi_seg_add_i64(b, addr, bi_zero(), false, BI_SEG_WLS);
|
||||
bi_emit_cached_split(b, addr, 64);
|
||||
}
|
||||
|
||||
bi_emit_atomic_i32_to(b, dst, addr, bi_src_index(&instr->src[1]), op);
|
||||
}
|
||||
|
||||
bi_emit_atomic_i32_to(b, dst, addr, bi_src_index(&instr->src[1]),
|
||||
instr->intrinsic);
|
||||
bi_split_dest(b, instr->dest);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_image_atomic_add:
|
||||
case nir_intrinsic_image_atomic_imin:
|
||||
case nir_intrinsic_image_atomic_umin:
|
||||
case nir_intrinsic_image_atomic_imax:
|
||||
case nir_intrinsic_image_atomic_umax:
|
||||
case nir_intrinsic_image_atomic_and:
|
||||
case nir_intrinsic_image_atomic_or:
|
||||
case nir_intrinsic_image_atomic_xor:
|
||||
assert(nir_src_bit_size(instr->src[3]) == 32);
|
||||
case nir_intrinsic_image_atomic: {
|
||||
nir_atomic_op op = nir_intrinsic_atomic_op(instr);
|
||||
|
||||
bi_emit_atomic_i32_to(b, dst, bi_emit_lea_image(b, instr),
|
||||
bi_src_index(&instr->src[3]), instr->intrinsic);
|
||||
bi_split_dest(b, instr->dest);
|
||||
break;
|
||||
if (op == nir_atomic_op_xchg) {
|
||||
bi_emit_axchg_to(b, dst, bi_emit_lea_image(b, instr), &instr->src[3],
|
||||
BI_SEG_NONE);
|
||||
} else {
|
||||
assert(nir_src_bit_size(instr->src[3]) == 32);
|
||||
|
||||
case nir_intrinsic_global_atomic_add:
|
||||
case nir_intrinsic_global_atomic_imin:
|
||||
case nir_intrinsic_global_atomic_umin:
|
||||
case nir_intrinsic_global_atomic_imax:
|
||||
case nir_intrinsic_global_atomic_umax:
|
||||
case nir_intrinsic_global_atomic_and:
|
||||
case nir_intrinsic_global_atomic_or:
|
||||
case nir_intrinsic_global_atomic_xor:
|
||||
assert(nir_src_bit_size(instr->src[1]) == 32);
|
||||
|
||||
bi_emit_atomic_i32_to(b, dst, bi_src_index(&instr->src[0]),
|
||||
bi_src_index(&instr->src[1]), instr->intrinsic);
|
||||
bi_emit_atomic_i32_to(b, dst, bi_emit_lea_image(b, instr),
|
||||
bi_src_index(&instr->src[3]), op);
|
||||
}
|
||||
|
||||
bi_split_dest(b, instr->dest);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_global_atomic: {
|
||||
nir_atomic_op op = nir_intrinsic_atomic_op(instr);
|
||||
|
||||
if (op == nir_atomic_op_xchg) {
|
||||
bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1],
|
||||
BI_SEG_WLS);
|
||||
} else {
|
||||
assert(nir_src_bit_size(instr->src[1]) == 32);
|
||||
|
||||
bi_emit_atomic_i32_to(b, dst, bi_src_index(&instr->src[0]),
|
||||
bi_src_index(&instr->src[1]), op);
|
||||
}
|
||||
|
||||
bi_split_dest(b, instr->dest);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_image_load:
|
||||
bi_emit_image_load(b, instr);
|
||||
|
|
@ -1682,37 +1652,19 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
bi_emit_image_store(b, instr);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_global_atomic_exchange:
|
||||
bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1],
|
||||
BI_SEG_NONE);
|
||||
bi_split_dest(b, instr->dest);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_image_atomic_exchange:
|
||||
bi_emit_axchg_to(b, dst, bi_emit_lea_image(b, instr), &instr->src[3],
|
||||
BI_SEG_NONE);
|
||||
bi_split_dest(b, instr->dest);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_shared_atomic_exchange:
|
||||
bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1],
|
||||
BI_SEG_WLS);
|
||||
bi_split_dest(b, instr->dest);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_global_atomic_comp_swap:
|
||||
case nir_intrinsic_global_atomic_swap:
|
||||
bi_emit_acmpxchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1],
|
||||
&instr->src[2], BI_SEG_NONE);
|
||||
bi_split_dest(b, instr->dest);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_image_atomic_comp_swap:
|
||||
case nir_intrinsic_image_atomic_swap:
|
||||
bi_emit_acmpxchg_to(b, dst, bi_emit_lea_image(b, instr), &instr->src[3],
|
||||
&instr->src[4], BI_SEG_NONE);
|
||||
bi_split_dest(b, instr->dest);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_shared_atomic_comp_swap:
|
||||
case nir_intrinsic_shared_atomic_swap:
|
||||
bi_emit_acmpxchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1],
|
||||
&instr->src[2], BI_SEG_WLS);
|
||||
bi_split_dest(b, instr->dest);
|
||||
|
|
@ -4484,6 +4436,7 @@ bi_optimize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend)
|
|||
NIR_PASS(progress, nir, nir_opt_cse);
|
||||
}
|
||||
|
||||
NIR_PASS(progress, nir, nir_lower_legacy_atomics);
|
||||
NIR_PASS(progress, nir, nir_lower_load_const_to_scalar);
|
||||
NIR_PASS(progress, nir, nir_opt_dce);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue