pco: Commonize atomic sync operations

Replace loop with macros
Rewrite channel op to multi channel select to avoid extra swizzle

Signed-off-by: Radu Costas <radu.costas@imgtec.com>
This commit is contained in:
Radu Costas 2026-03-04 10:20:10 +02:00
parent 8b706f4c0f
commit 0143b859b5
2 changed files with 23 additions and 27 deletions

View file

@ -124,8 +124,7 @@ lower_usclib_atomic(nir_builder *b, nir_instr *instr, void *cb_data)
}
nir_def *addr_data = intr->src[0].ssa;
nir_def *addr_lo = nir_channel(b, addr_data, 0);
nir_def *addr_hi = nir_channel(b, addr_data, 1);
nir_def *addr = nir_channels(b, addr_data, BITFIELD_RANGE(0, 2));
nir_def *value = nir_channel(b, addr_data, 2);
nir_def *value_swap = nir_channel(b, addr_data, 3);
@ -134,11 +133,7 @@ lower_usclib_atomic(nir_builder *b, nir_instr *instr, void *cb_data)
assert(num_components == 1 && bit_size == 32);
*uses_usclib = true;
return usclib_emu_global_atomic_comp_swap(b,
addr_lo,
addr_hi,
value,
value_swap);
return usclib_emu_global_atomic_comp_swap(b, addr, value, value_swap);
}
static bool lower_global_atomic_intrinsic(nir_builder *b,

View file

@ -13,43 +13,44 @@
#include "hwdef/rogue_hw_defs.h"
#include "libcl.h"
/*
* Emulates atomic operations by serializing execution to each slot via a
* mutex, and to each instance via a per-instance loop.
*/
#define usclib_foreach_instance_atomic() \
nir_mutex_pco(PCO_MUTEX_ID_ATOMIC_EMU, PCO_MUTEX_OP_LOCK); \
for (bool __done = false; !__done; ({ nir_mutex_pco(PCO_MUTEX_ID_ATOMIC_EMU, PCO_MUTEX_OP_RELEASE); __done = true; })) \
for (uint __u = 0; __u < ROGUE_MAX_INSTANCES_PER_TASK; ++__u) \
if (__u == nir_load_instance_num_pco())
uint32_t
usclib_emu_ssbo_atomic_comp_swap(uint2 ssbo_buffer, uint ssbo_offset, uint compare, uint data)
{
uint32_t result;
nir_mutex_pco(PCO_MUTEX_ID_ATOMIC_EMU, PCO_MUTEX_OP_LOCK);
for (uint u = 0; u < ROGUE_MAX_INSTANCES_PER_TASK; ++u) {
if (u == nir_load_instance_num_pco()) {
uint32_t pre_val = nir_load_ssbo(ssbo_buffer, ssbo_offset, ACCESS_COHERENT, 4, 0, 0);
result = pre_val;
usclib_foreach_instance_atomic() {
uint32_t pre_val = nir_load_ssbo(ssbo_buffer, ssbo_offset, ACCESS_COHERENT, 4, 0, 0);
result = pre_val;
uint32_t post_val = (pre_val == compare) ? data : pre_val;
nir_store_ssbo(post_val, ssbo_buffer, ssbo_offset, 0x1, ACCESS_COHERENT, 4, 0, 0);
}
uint32_t post_val = (pre_val == compare) ? data : pre_val;
nir_store_ssbo(post_val, ssbo_buffer, ssbo_offset, 0x1, ACCESS_COHERENT, 4, 0, 0);
}
nir_mutex_pco(PCO_MUTEX_ID_ATOMIC_EMU, PCO_MUTEX_OP_RELEASE);
return result;
}
uint32_t
usclib_emu_global_atomic_comp_swap(uint32_t addr_lo, uint32_t addr_hi, uint compare, uint data)
usclib_emu_global_atomic_comp_swap(uint2 addr, uint compare, uint data)
{
uint32_t result;
nir_mutex_pco(PCO_MUTEX_ID_ATOMIC_EMU, PCO_MUTEX_OP_LOCK);
for (uint u = 0; u < ROGUE_MAX_INSTANCES_PER_TASK; ++u) {
if (u == nir_load_instance_num_pco()) {
uint2 addr = (uint2)(addr_lo, addr_hi);
uint32_t pre_val = nir_dma_ld_pco(1, addr);
result = pre_val;
usclib_foreach_instance_atomic() {
uint32_t pre_val = nir_dma_ld_pco(1, addr);
result = pre_val;
uint32_t post_val = (pre_val == compare) ? data : pre_val;
nir_dma_st_pco(false, addr, post_val);
}
uint32_t post_val = (pre_val == compare) ? data : pre_val;
nir_dma_st_pco(false, addr, post_val);
}
nir_mutex_pco(PCO_MUTEX_ID_ATOMIC_EMU, PCO_MUTEX_OP_RELEASE);
return result;
}