diff --git a/src/imagination/pco/pco_nir_sync.c b/src/imagination/pco/pco_nir_sync.c index e2021bd0ae0..1aa3ea887b0 100644 --- a/src/imagination/pco/pco_nir_sync.c +++ b/src/imagination/pco/pco_nir_sync.c @@ -124,8 +124,7 @@ lower_usclib_atomic(nir_builder *b, nir_instr *instr, void *cb_data) } nir_def *addr_data = intr->src[0].ssa; - nir_def *addr_lo = nir_channel(b, addr_data, 0); - nir_def *addr_hi = nir_channel(b, addr_data, 1); + nir_def *addr = nir_channels(b, addr_data, BITFIELD_RANGE(0, 2)); nir_def *value = nir_channel(b, addr_data, 2); nir_def *value_swap = nir_channel(b, addr_data, 3); @@ -134,11 +133,7 @@ lower_usclib_atomic(nir_builder *b, nir_instr *instr, void *cb_data) assert(num_components == 1 && bit_size == 32); *uses_usclib = true; - return usclib_emu_global_atomic_comp_swap(b, - addr_lo, - addr_hi, - value, - value_swap); + return usclib_emu_global_atomic_comp_swap(b, addr, value, value_swap); } static bool lower_global_atomic_intrinsic(nir_builder *b, diff --git a/src/imagination/pco/usclib/sync.cl b/src/imagination/pco/usclib/sync.cl index 7678013e238..85140a57b84 100644 --- a/src/imagination/pco/usclib/sync.cl +++ b/src/imagination/pco/usclib/sync.cl @@ -13,43 +13,44 @@ #include "hwdef/rogue_hw_defs.h" #include "libcl.h" +/* + * Emulates atomic operations by serializing execution to each slot via a + * mutex, and to each instance via a per-instance loop. + */ +#define usclib_foreach_instance_atomic() \ + nir_mutex_pco(PCO_MUTEX_ID_ATOMIC_EMU, PCO_MUTEX_OP_LOCK); \ + for (bool __done = false; !__done; ({ nir_mutex_pco(PCO_MUTEX_ID_ATOMIC_EMU, PCO_MUTEX_OP_RELEASE); __done = true; })) \ + for (uint __u = 0; __u < ROGUE_MAX_INSTANCES_PER_TASK; ++__u) \ + if (__u == nir_load_instance_num_pco()) + uint32_t usclib_emu_ssbo_atomic_comp_swap(uint2 ssbo_buffer, uint ssbo_offset, uint compare, uint data) { uint32_t result; - nir_mutex_pco(PCO_MUTEX_ID_ATOMIC_EMU, PCO_MUTEX_OP_LOCK); - for (uint u = 0; u < ROGUE_MAX_INSTANCES_PER_TASK; ++u) { - if (u == nir_load_instance_num_pco()) { - uint32_t pre_val = nir_load_ssbo(ssbo_buffer, ssbo_offset, ACCESS_COHERENT, 4, 0, 0); - result = pre_val; + usclib_foreach_instance_atomic() { + uint32_t pre_val = nir_load_ssbo(ssbo_buffer, ssbo_offset, ACCESS_COHERENT, 4, 0, 0); + result = pre_val; - uint32_t post_val = (pre_val == compare) ? data : pre_val; - nir_store_ssbo(post_val, ssbo_buffer, ssbo_offset, 0x1, ACCESS_COHERENT, 4, 0, 0); - } + uint32_t post_val = (pre_val == compare) ? data : pre_val; + nir_store_ssbo(post_val, ssbo_buffer, ssbo_offset, 0x1, ACCESS_COHERENT, 4, 0, 0); } - nir_mutex_pco(PCO_MUTEX_ID_ATOMIC_EMU, PCO_MUTEX_OP_RELEASE); return result; } uint32_t -usclib_emu_global_atomic_comp_swap(uint32_t addr_lo, uint32_t addr_hi, uint compare, uint data) +usclib_emu_global_atomic_comp_swap(uint2 addr, uint compare, uint data) { uint32_t result; - nir_mutex_pco(PCO_MUTEX_ID_ATOMIC_EMU, PCO_MUTEX_OP_LOCK); - for (uint u = 0; u < ROGUE_MAX_INSTANCES_PER_TASK; ++u) { - if (u == nir_load_instance_num_pco()) { - uint2 addr = (uint2)(addr_lo, addr_hi); - uint32_t pre_val = nir_dma_ld_pco(1, addr); - result = pre_val; + usclib_foreach_instance_atomic() { + uint32_t pre_val = nir_dma_ld_pco(1, addr); + result = pre_val; - uint32_t post_val = (pre_val == compare) ? data : pre_val; - nir_dma_st_pco(false, addr, post_val); - } + uint32_t post_val = (pre_val == compare) ? data : pre_val; + nir_dma_st_pco(false, addr, post_val); } - nir_mutex_pco(PCO_MUTEX_ID_ATOMIC_EMU, PCO_MUTEX_OP_RELEASE); return result; }