diff --git a/src/panfrost/compiler/bi_lower_divergent_indirects.c b/src/panfrost/compiler/bi_lower_divergent_indirects.c index e8453baaa79..490e57bc1bf 100644 --- a/src/panfrost/compiler/bi_lower_divergent_indirects.c +++ b/src/panfrost/compiler/bi_lower_divergent_indirects.c @@ -63,14 +63,8 @@ bi_lower_divergent_indirects_impl(nir_builder *b, nir_instr *instr, void *data) offset = nir_get_io_offset_src(intr); break; - case nir_intrinsic_image_atomic_add: - case nir_intrinsic_image_atomic_imin: - case nir_intrinsic_image_atomic_umin: - case nir_intrinsic_image_atomic_imax: - case nir_intrinsic_image_atomic_umax: - case nir_intrinsic_image_atomic_and: - case nir_intrinsic_image_atomic_or: - case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic: + case nir_intrinsic_image_atomic_swap: case nir_intrinsic_image_load: case nir_intrinsic_image_store: /* Any image access */ diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c index c679c71b7ea..44d8f87e44c 100644 --- a/src/panfrost/compiler/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost_compile.c @@ -1219,55 +1219,22 @@ bi_emit_acmpxchg_to(bi_builder *b, bi_index dst, bi_index addr, nir_src *arg_1, bi_make_vec_to(b, dst, inout_words, NULL, sz / 32, 32); } -/* Extracts an atomic opcode */ - static enum bi_atom_opc -bi_atom_opc_for_nir(nir_intrinsic_op op) +bi_atom_opc_for_nir(nir_atomic_op op) { + /* clang-format off */ switch (op) { - case nir_intrinsic_global_atomic_add: - case nir_intrinsic_shared_atomic_add: - case nir_intrinsic_image_atomic_add: - return BI_ATOM_OPC_AADD; - - case nir_intrinsic_global_atomic_imin: - case nir_intrinsic_shared_atomic_imin: - case nir_intrinsic_image_atomic_imin: - return BI_ATOM_OPC_ASMIN; - - case nir_intrinsic_global_atomic_umin: - case nir_intrinsic_shared_atomic_umin: - case nir_intrinsic_image_atomic_umin: - return BI_ATOM_OPC_AUMIN; - - case nir_intrinsic_global_atomic_imax: - case nir_intrinsic_shared_atomic_imax: - case nir_intrinsic_image_atomic_imax: - return BI_ATOM_OPC_ASMAX; - - case nir_intrinsic_global_atomic_umax: - case nir_intrinsic_shared_atomic_umax: - case nir_intrinsic_image_atomic_umax: - return BI_ATOM_OPC_AUMAX; - - case nir_intrinsic_global_atomic_and: - case nir_intrinsic_shared_atomic_and: - case nir_intrinsic_image_atomic_and: - return BI_ATOM_OPC_AAND; - - case nir_intrinsic_global_atomic_or: - case nir_intrinsic_shared_atomic_or: - case nir_intrinsic_image_atomic_or: - return BI_ATOM_OPC_AOR; - - case nir_intrinsic_global_atomic_xor: - case nir_intrinsic_shared_atomic_xor: - case nir_intrinsic_image_atomic_xor: - return BI_ATOM_OPC_AXOR; - - default: - unreachable("Unexpected computational atomic"); + case nir_atomic_op_iadd: return BI_ATOM_OPC_AADD; + case nir_atomic_op_imin: return BI_ATOM_OPC_ASMIN; + case nir_atomic_op_umin: return BI_ATOM_OPC_AUMIN; + case nir_atomic_op_imax: return BI_ATOM_OPC_ASMAX; + case nir_atomic_op_umax: return BI_ATOM_OPC_AUMAX; + case nir_atomic_op_iand: return BI_ATOM_OPC_AAND; + case nir_atomic_op_ior: return BI_ATOM_OPC_AOR; + case nir_atomic_op_ixor: return BI_ATOM_OPC_AXOR; + default: unreachable("Unexpected computational atomic"); } + /* clang-format on */ } /* Optimized unary atomics are available with an implied #1 argument */ @@ -1455,9 +1422,9 @@ bi_emit_image_store(bi_builder *b, nir_intrinsic_instr *instr) static void bi_emit_atomic_i32_to(bi_builder *b, bi_index dst, bi_index addr, bi_index arg, - nir_intrinsic_op intrinsic) + nir_atomic_op op) { - enum bi_atom_opc opc = bi_atom_opc_for_nir(intrinsic); + enum bi_atom_opc opc = bi_atom_opc_for_nir(op); enum bi_atom_opc post_opc = opc; bool bifrost = b->shader->arch <= 8; @@ -1616,63 +1583,66 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr) */ break; - case nir_intrinsic_shared_atomic_add: - case nir_intrinsic_shared_atomic_imin: - case nir_intrinsic_shared_atomic_umin: - case nir_intrinsic_shared_atomic_imax: - case nir_intrinsic_shared_atomic_umax: - case nir_intrinsic_shared_atomic_and: - case nir_intrinsic_shared_atomic_or: - case nir_intrinsic_shared_atomic_xor: { - assert(nir_src_bit_size(instr->src[1]) == 32); + case nir_intrinsic_shared_atomic: { + nir_atomic_op op = nir_intrinsic_atomic_op(instr); - bi_index addr = bi_src_index(&instr->src[0]); - bi_index addr_hi; - - if (b->shader->arch >= 9) { - bi_handle_segment(b, &addr, &addr_hi, BI_SEG_WLS, NULL); - addr = bi_collect_v2i32(b, addr, addr_hi); + if (op == nir_atomic_op_xchg) { + bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1], + BI_SEG_WLS); } else { - addr = bi_seg_add_i64(b, addr, bi_zero(), false, BI_SEG_WLS); - bi_emit_cached_split(b, addr, 64); + assert(nir_src_bit_size(instr->src[1]) == 32); + + bi_index addr = bi_src_index(&instr->src[0]); + bi_index addr_hi; + + if (b->shader->arch >= 9) { + bi_handle_segment(b, &addr, &addr_hi, BI_SEG_WLS, NULL); + addr = bi_collect_v2i32(b, addr, addr_hi); + } else { + addr = bi_seg_add_i64(b, addr, bi_zero(), false, BI_SEG_WLS); + bi_emit_cached_split(b, addr, 64); + } + + bi_emit_atomic_i32_to(b, dst, addr, bi_src_index(&instr->src[1]), op); } - bi_emit_atomic_i32_to(b, dst, addr, bi_src_index(&instr->src[1]), - instr->intrinsic); bi_split_dest(b, instr->dest); break; } - case nir_intrinsic_image_atomic_add: - case nir_intrinsic_image_atomic_imin: - case nir_intrinsic_image_atomic_umin: - case nir_intrinsic_image_atomic_imax: - case nir_intrinsic_image_atomic_umax: - case nir_intrinsic_image_atomic_and: - case nir_intrinsic_image_atomic_or: - case nir_intrinsic_image_atomic_xor: - assert(nir_src_bit_size(instr->src[3]) == 32); + case nir_intrinsic_image_atomic: { + nir_atomic_op op = nir_intrinsic_atomic_op(instr); - bi_emit_atomic_i32_to(b, dst, bi_emit_lea_image(b, instr), - bi_src_index(&instr->src[3]), instr->intrinsic); - bi_split_dest(b, instr->dest); - break; + if (op == nir_atomic_op_xchg) { + bi_emit_axchg_to(b, dst, bi_emit_lea_image(b, instr), &instr->src[3], + BI_SEG_NONE); + } else { + assert(nir_src_bit_size(instr->src[3]) == 32); - case nir_intrinsic_global_atomic_add: - case nir_intrinsic_global_atomic_imin: - case nir_intrinsic_global_atomic_umin: - case nir_intrinsic_global_atomic_imax: - case nir_intrinsic_global_atomic_umax: - case nir_intrinsic_global_atomic_and: - case nir_intrinsic_global_atomic_or: - case nir_intrinsic_global_atomic_xor: - assert(nir_src_bit_size(instr->src[1]) == 32); - - bi_emit_atomic_i32_to(b, dst, bi_src_index(&instr->src[0]), - bi_src_index(&instr->src[1]), instr->intrinsic); + bi_emit_atomic_i32_to(b, dst, bi_emit_lea_image(b, instr), + bi_src_index(&instr->src[3]), op); + } bi_split_dest(b, instr->dest); break; + } + + case nir_intrinsic_global_atomic: { + nir_atomic_op op = nir_intrinsic_atomic_op(instr); + + if (op == nir_atomic_op_xchg) { + bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1], + BI_SEG_WLS); + } else { + assert(nir_src_bit_size(instr->src[1]) == 32); + + bi_emit_atomic_i32_to(b, dst, bi_src_index(&instr->src[0]), + bi_src_index(&instr->src[1]), op); + } + + bi_split_dest(b, instr->dest); + break; + } case nir_intrinsic_image_load: bi_emit_image_load(b, instr); @@ -1682,37 +1652,19 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr) bi_emit_image_store(b, instr); break; - case nir_intrinsic_global_atomic_exchange: - bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1], - BI_SEG_NONE); - bi_split_dest(b, instr->dest); - break; - - case nir_intrinsic_image_atomic_exchange: - bi_emit_axchg_to(b, dst, bi_emit_lea_image(b, instr), &instr->src[3], - BI_SEG_NONE); - bi_split_dest(b, instr->dest); - break; - - case nir_intrinsic_shared_atomic_exchange: - bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1], - BI_SEG_WLS); - bi_split_dest(b, instr->dest); - break; - - case nir_intrinsic_global_atomic_comp_swap: + case nir_intrinsic_global_atomic_swap: bi_emit_acmpxchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1], &instr->src[2], BI_SEG_NONE); bi_split_dest(b, instr->dest); break; - case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_image_atomic_swap: bi_emit_acmpxchg_to(b, dst, bi_emit_lea_image(b, instr), &instr->src[3], &instr->src[4], BI_SEG_NONE); bi_split_dest(b, instr->dest); break; - case nir_intrinsic_shared_atomic_comp_swap: + case nir_intrinsic_shared_atomic_swap: bi_emit_acmpxchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1], &instr->src[2], BI_SEG_WLS); bi_split_dest(b, instr->dest); @@ -4484,6 +4436,7 @@ bi_optimize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend) NIR_PASS(progress, nir, nir_opt_cse); } + NIR_PASS(progress, nir, nir_lower_legacy_atomics); NIR_PASS(progress, nir, nir_lower_load_const_to_scalar); NIR_PASS(progress, nir, nir_opt_dce);