agx: Fold addressing math into atomics

Like our loads and stores, our global atomics support indexing with a 64-bit
base plus a 32-bit element index, zero- or sign-extended and multiplied by the
word size. Unlike the loads and stores, they do not support additional shifting
(it's not too useful), so that needs an explicit lowering.

Switch to using AGX variants of the atomics, running our address pattern
matching on global atomics in order to delete some ALU.

This cleans up the image atomic lowering nicely, since we get to take full
advantage of the shift + zero-extend + add on the atomic... The shift comes from
multiplying by the bytes per pixel.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Christian Gmeiner <christian.gmeiner@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23529>
This commit is contained in:
Alyssa Rosenzweig 2023-05-27 21:57:35 -04:00 committed by Marge Bot
parent 13535d3f9d
commit ba27071c8b
2 changed files with 19 additions and 5 deletions

View file

@ -730,12 +730,12 @@ agx_emit_atomic(agx_builder *b, agx_index dst, nir_intrinsic_instr *instr,
translate_atomic_opcode(nir_intrinsic_atomic_op(instr));
agx_index base =
local ? agx_local_base(instr->src[0]) : agx_src_index(&instr->src[0]);
agx_index value = agx_src_index(&instr->src[1]);
agx_index index = agx_zero(); /* TODO: optimize address arithmetic? */
agx_index value = agx_src_index(&instr->src[local ? 1 : 2]);
agx_index index = local ? agx_zero() : agx_src_index(&instr->src[1]);
/* cmpxchg (only) takes 2 sources, passed in consecutive registers */
if (op == AGX_ATOMIC_OPC_CMPXCHG) {
agx_index value2 = agx_src_index(&instr->src[2]);
agx_index value2 = agx_src_index(&instr->src[local ? 2 : 3]);
value = agx_vec2(b, value2, value);
}
@ -827,8 +827,8 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr)
agx_emit_local_load(b, dst, instr);
return NULL;
case nir_intrinsic_global_atomic:
case nir_intrinsic_global_atomic_swap:
case nir_intrinsic_global_atomic_agx:
case nir_intrinsic_global_atomic_swap_agx:
agx_emit_atomic(b, dst, instr, false);
return NULL;

View file

@ -246,6 +246,8 @@ pass(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic != nir_intrinsic_load_global &&
intr->intrinsic != nir_intrinsic_load_global_constant &&
intr->intrinsic != nir_intrinsic_global_atomic &&
intr->intrinsic != nir_intrinsic_global_atomic_swap &&
intr->intrinsic != nir_intrinsic_store_global)
return false;
@ -309,6 +311,18 @@ pass(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
offset, .access = nir_intrinsic_access(intr),
.base = match.shift, .format = format,
.sign_extend = match.sign_extend);
} else if (intr->intrinsic == nir_intrinsic_global_atomic) {
offset = nir_ishl_imm(b, offset, match.shift);
repl =
nir_global_atomic_agx(b, bit_size, new_base, offset, intr->src[1].ssa,
.atomic_op = nir_intrinsic_atomic_op(intr),
.sign_extend = match.sign_extend);
} else if (intr->intrinsic == nir_intrinsic_global_atomic_swap) {
offset = nir_ishl_imm(b, offset, match.shift);
repl = nir_global_atomic_swap_agx(
b, bit_size, new_base, offset, intr->src[1].ssa, intr->src[2].ssa,
.atomic_op = nir_intrinsic_atomic_op(intr),
.sign_extend = match.sign_extend);
} else {
nir_store_agx(b, intr->src[0].ssa, new_base, offset,
.access = nir_intrinsic_access(intr), .base = match.shift,