mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 02:10:11 +01:00
pco: fully support Vulkan 1.2 image atomics
Signed-off-by: Simon Perretta <simon.perretta@imgtec.com> Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37512>
This commit is contained in:
parent
08e3740e07
commit
6dc5e1e109
7 changed files with 222 additions and 19 deletions
|
|
@ -918,6 +918,7 @@ intrinsic("global_atomic_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[ATOMI
|
|||
intrinsic("global_atomic_swap_2x32", src_comp=[2, 1, 1], dest_comp=1, indices=[ATOMIC_OP])
|
||||
intrinsic("global_atomic_swap_amd", src_comp=[1, 1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
|
||||
intrinsic("global_atomic_swap_agx", src_comp=[1, 1, 1, 1], dest_comp=1, indices=[ATOMIC_OP, SIGN_EXTEND])
|
||||
intrinsic("global_atomic_swap_pco", src_comp=[4], dest_comp=1, indices=[ATOMIC_OP], bit_sizes=[32])
|
||||
|
||||
def system_value(name, dest_comp, indices=[], bit_sizes=[32], can_reorder=True):
|
||||
flags = [CAN_ELIMINATE, CAN_REORDER] if can_reorder else [CAN_ELIMINATE]
|
||||
|
|
|
|||
|
|
@ -837,7 +837,6 @@ void pco_lower_nir(pco_ctx *ctx, nir_shader *nir, pco_data *data)
|
|||
|
||||
NIR_PASS(_, nir, pco_nir_lower_vk, data);
|
||||
NIR_PASS(_, nir, pco_nir_lower_io);
|
||||
NIR_PASS(_, nir, pco_nir_lower_atomics, data);
|
||||
|
||||
NIR_PASS(_, nir, nir_opt_constant_folding);
|
||||
|
||||
|
|
@ -873,6 +872,7 @@ void pco_lower_nir(pco_ctx *ctx, nir_shader *nir, pco_data *data)
|
|||
NIR_PASS(_, nir, pco_nir_lower_clip_cull_vars);
|
||||
|
||||
NIR_PASS(_, nir, pco_nir_lower_images, data);
|
||||
NIR_PASS(_, nir, pco_nir_lower_atomics, data);
|
||||
NIR_PASS(_,
|
||||
nir,
|
||||
nir_lower_tex,
|
||||
|
|
|
|||
|
|
@ -101,19 +101,42 @@ static nir_def *lower_atomic(nir_builder *b, nir_instr *instr, void *cb_data)
|
|||
|
||||
b->cursor = nir_before_instr(instr);
|
||||
|
||||
nir_def *buffer = intr->src[0].ssa;
|
||||
nir_def *offset = intr->src[1].ssa;
|
||||
nir_def *value = intr->src[2].ssa;
|
||||
nir_def *value_swap = intr->src[3].ssa;
|
||||
if (intr->intrinsic == nir_intrinsic_ssbo_atomic_swap) {
|
||||
nir_def *buffer = intr->src[0].ssa;
|
||||
nir_def *offset = intr->src[1].ssa;
|
||||
nir_def *value = intr->src[2].ssa;
|
||||
nir_def *value_swap = intr->src[3].ssa;
|
||||
|
||||
ASSERTED enum gl_access_qualifier access = nir_intrinsic_access(intr);
|
||||
ASSERTED unsigned num_components = intr->def.num_components;
|
||||
ASSERTED unsigned bit_size = intr->def.bit_size;
|
||||
assert(access == ACCESS_COHERENT);
|
||||
assert(num_components == 1 && bit_size == 32);
|
||||
|
||||
*uses_usclib = true;
|
||||
return usclib_emu_ssbo_atomic_comp_swap(b,
|
||||
buffer,
|
||||
offset,
|
||||
value,
|
||||
value_swap);
|
||||
}
|
||||
|
||||
nir_def *addr_data = intr->src[0].ssa;
|
||||
nir_def *addr_lo = nir_channel(b, addr_data, 0);
|
||||
nir_def *addr_hi = nir_channel(b, addr_data, 1);
|
||||
nir_def *value = nir_channel(b, addr_data, 2);
|
||||
nir_def *value_swap = nir_channel(b, addr_data, 3);
|
||||
|
||||
ASSERTED enum gl_access_qualifier access = nir_intrinsic_access(intr);
|
||||
ASSERTED unsigned num_components = intr->def.num_components;
|
||||
ASSERTED unsigned bit_size = intr->def.bit_size;
|
||||
assert(access == ACCESS_COHERENT);
|
||||
assert(num_components == 1 && bit_size == 32);
|
||||
|
||||
*uses_usclib = true;
|
||||
return usclib_emu_ssbo_atomic_comp_swap(b, buffer, offset, value, value_swap);
|
||||
return usclib_emu_global_atomic_comp_swap(b,
|
||||
addr_lo,
|
||||
addr_hi,
|
||||
value,
|
||||
value_swap);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -129,8 +152,10 @@ static bool is_lowerable_atomic(const nir_instr *instr,
|
|||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
return nir_instr_as_intrinsic(instr)->intrinsic ==
|
||||
nir_intrinsic_ssbo_atomic_swap;
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
|
||||
return intr->intrinsic == nir_intrinsic_ssbo_atomic_swap ||
|
||||
intr->intrinsic == nir_intrinsic_global_atomic_swap_pco;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -839,6 +839,7 @@ static nir_def *lower_image(nir_builder *b, nir_instr *instr, void *cb_data)
|
|||
|
||||
case nir_intrinsic_image_deref_atomic:
|
||||
case nir_intrinsic_image_deref_atomic_swap:
|
||||
lod = nir_imm_int(b, 0);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
@ -1158,9 +1159,6 @@ static nir_def *lower_image(nir_builder *b, nir_instr *instr, void *cb_data)
|
|||
|
||||
if (intr->intrinsic == nir_intrinsic_image_deref_atomic ||
|
||||
intr->intrinsic == nir_intrinsic_image_deref_atomic_swap) {
|
||||
assert(image_dim == GLSL_SAMPLER_DIM_2D);
|
||||
assert(!is_array);
|
||||
|
||||
assert(util_format_is_plain(format));
|
||||
assert(util_format_is_pure_integer(format));
|
||||
|
||||
|
|
@ -1170,12 +1168,90 @@ static nir_def *lower_image(nir_builder *b, nir_instr *instr, void *cb_data)
|
|||
assert(util_format_get_blockdepth(format) == 1);
|
||||
assert(util_format_get_blocksize(format) == sizeof(uint32_t));
|
||||
|
||||
/* Calculate untwiddled offset. */
|
||||
nir_def *x = nir_i2i16(b, nir_channel(b, coords, 0));
|
||||
nir_def *y = nir_i2i16(b, nir_channel(b, coords, 1));
|
||||
nir_def *twiddled_offset = nir_interleave(b, y, x);
|
||||
twiddled_offset =
|
||||
nir_imul_imm(b, twiddled_offset, util_format_get_blocksize(format));
|
||||
if (image_dim == GLSL_SAMPLER_DIM_CUBE) {
|
||||
image_dim = GLSL_SAMPLER_DIM_2D;
|
||||
is_array = true;
|
||||
} else if (image_dim == GLSL_SAMPLER_DIM_BUF) {
|
||||
image_dim = GLSL_SAMPLER_DIM_2D;
|
||||
coords = nir_vec2(b,
|
||||
nir_umod_imm(b, coords, 8192),
|
||||
nir_udiv_imm(b, coords, 8192));
|
||||
}
|
||||
|
||||
nir_def *twiddled_offset = NULL;
|
||||
nir_def *array_index = NULL;
|
||||
switch (image_dim) {
|
||||
case GLSL_SAMPLER_DIM_1D: {
|
||||
twiddled_offset = nir_channel(b, coords, 0);
|
||||
twiddled_offset =
|
||||
nir_imul_imm(b, twiddled_offset, util_format_get_blocksize(format));
|
||||
if (is_array)
|
||||
array_index = nir_channel(b, coords, 1);
|
||||
break;
|
||||
}
|
||||
|
||||
case GLSL_SAMPLER_DIM_2D: {
|
||||
/* Calculate untwiddled offset. */
|
||||
nir_def *x = nir_i2i16(b, nir_channel(b, coords, 0));
|
||||
nir_def *y = nir_i2i16(b, nir_channel(b, coords, 1));
|
||||
twiddled_offset = nir_interleave(b, y, x);
|
||||
twiddled_offset =
|
||||
nir_imul_imm(b, twiddled_offset, util_format_get_blocksize(format));
|
||||
|
||||
if (is_array)
|
||||
array_index = nir_channel(b, coords, 2);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case GLSL_SAMPLER_DIM_3D: {
|
||||
assert(!is_array);
|
||||
|
||||
/* Calculate untwiddled offset. */
|
||||
nir_def *num_comps = nir_imm_int(b, 3);
|
||||
nir_def *dim = nir_imm_int(b, image_dim);
|
||||
nir_def *_is_array = nir_imm_bool(b, is_array);
|
||||
nir_def *is_image = nir_imm_bool(b, true);
|
||||
nir_def *size_comps = usclib_tex_state_size(b,
|
||||
tex_state,
|
||||
num_comps,
|
||||
dim,
|
||||
_is_array,
|
||||
is_image,
|
||||
lod);
|
||||
|
||||
twiddled_offset = usclib_twiddle3d(b, coords, size_comps);
|
||||
data->common.uses.usclib = true;
|
||||
|
||||
twiddled_offset =
|
||||
nir_imul_imm(b, twiddled_offset, util_format_get_blocksize(format));
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
UNREACHABLE("");
|
||||
}
|
||||
|
||||
assert(twiddled_offset);
|
||||
|
||||
if (is_array) {
|
||||
assert(array_index);
|
||||
nir_def *array_max = usclib_tex_state_array_max(b, tex_state);
|
||||
array_index = nir_uclamp(b, array_index, nir_imm_int(b, 0), array_max);
|
||||
|
||||
nir_def *tex_meta = nir_load_tex_meta_pco(b,
|
||||
PCO_IMAGE_META_COUNT,
|
||||
elem,
|
||||
.desc_set = desc_set,
|
||||
.binding = binding);
|
||||
|
||||
nir_def *array_stride =
|
||||
nir_channel(b, tex_meta, PCO_IMAGE_META_LAYER_SIZE);
|
||||
|
||||
nir_def *array_offset = nir_imul(b, array_index, array_stride);
|
||||
twiddled_offset = nir_iadd(b, twiddled_offset, array_offset);
|
||||
}
|
||||
|
||||
/* Offset the address by the co-ordinates. */
|
||||
nir_def *base_addr = usclib_tex_state_address(b, tex_state);
|
||||
|
|
@ -1187,6 +1263,19 @@ static nir_def *lower_image(nir_builder *b, nir_instr *instr, void *cb_data)
|
|||
|
||||
nir_def *addr_lo = nir_channel(b, addr, 0);
|
||||
nir_def *addr_hi = nir_channel(b, addr, 1);
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_image_deref_atomic_swap) {
|
||||
nir_def *compare = intr->src[3].ssa;
|
||||
nir_def *dma_data = intr->src[4].ssa;
|
||||
|
||||
nir_def *addr_data = nir_vec4(b, addr_lo, addr_hi, compare, dma_data);
|
||||
|
||||
return nir_global_atomic_swap_pco(b,
|
||||
addr_data,
|
||||
.atomic_op =
|
||||
nir_intrinsic_atomic_op(intr));
|
||||
}
|
||||
|
||||
nir_def *dma_data = intr->src[3].ssa;
|
||||
|
||||
nir_def *addr_data = nir_vec3(b, addr_lo, addr_hi, dma_data);
|
||||
|
|
@ -1360,6 +1449,7 @@ static bool is_image(const nir_instr *instr, UNUSED const void *cb_data)
|
|||
case nir_intrinsic_image_deref_load:
|
||||
case nir_intrinsic_image_deref_store:
|
||||
case nir_intrinsic_image_deref_atomic:
|
||||
case nir_intrinsic_image_deref_atomic_swap:
|
||||
case nir_intrinsic_image_deref_size:
|
||||
return true;
|
||||
|
||||
|
|
|
|||
|
|
@ -112,4 +112,6 @@ uint nir_smp_pco(uint16 data,
|
|||
uint4 smp_state,
|
||||
uint smp_flags,
|
||||
uint range);
|
||||
|
||||
uint nir_umax(uint a, uint b);
|
||||
#endif /* PCO_LIBCL_H */
|
||||
|
|
|
|||
|
|
@ -33,6 +33,27 @@ usclib_emu_ssbo_atomic_comp_swap(uint2 ssbo_buffer, uint ssbo_offset, uint compa
|
|||
return result;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
usclib_emu_global_atomic_comp_swap(uint32_t addr_lo, uint32_t addr_hi, uint compare, uint data)
|
||||
{
|
||||
uint32_t result;
|
||||
|
||||
nir_mutex_pco(PCO_MUTEX_ID_ATOMIC_EMU, PCO_MUTEX_OP_LOCK);
|
||||
for (uint u = 0; u < ROGUE_MAX_INSTANCES_PER_TASK; ++u) {
|
||||
if (u == nir_load_instance_num_pco()) {
|
||||
uint2 addr = (uint2)(addr_lo, addr_hi);
|
||||
uint32_t pre_val = nir_dma_ld_pco(1, addr);
|
||||
result = pre_val;
|
||||
|
||||
uint32_t post_val = (pre_val == compare) ? data : pre_val;
|
||||
nir_dma_st_pco(false, addr, post_val);
|
||||
}
|
||||
}
|
||||
nir_mutex_pco(PCO_MUTEX_ID_ATOMIC_EMU, PCO_MUTEX_OP_RELEASE);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
usclib_barrier(uint num_slots, uint counter_offset)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@
|
|||
|
||||
#include "csbgen/rogue_texstate.h"
|
||||
#include "libcl.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
|
||||
uint
|
||||
|
|
@ -123,3 +124,66 @@ usclib_tex_lod_dval_post_clamp_resource_to_view_space(uint4 tex_state, uint4 smp
|
|||
|
||||
return MAX2(lod_dval_post_clamp, 0.0f);
|
||||
}
|
||||
|
||||
/* TODO: this can probably be optimized with nir_interleave. */
|
||||
uint32_t
|
||||
usclib_twiddle3d(uint3 coords, uint3 size)
|
||||
{
|
||||
uint32_t width = nir_umax(size.x, 4);
|
||||
width = util_next_power_of_two(width);
|
||||
|
||||
uint32_t height = nir_umax(size.y, 4);
|
||||
height = util_next_power_of_two(height);
|
||||
|
||||
uint32_t depth = nir_umax(size.z, 4);
|
||||
depth = util_next_power_of_two(depth);
|
||||
|
||||
/* Get to the inner 4x4 cube. */
|
||||
width /= 4;
|
||||
height /= 4;
|
||||
depth /= 4;
|
||||
|
||||
uint32_t cx = coords.x / 4;
|
||||
uint32_t cy = coords.y / 4;
|
||||
uint32_t cz = coords.z / 4;
|
||||
uint32_t shift = 0;
|
||||
uint32_t cubeoffset = 0;
|
||||
uint32_t i = 0;
|
||||
|
||||
while (width > 1 || height > 1 || depth > 1) {
|
||||
uint32_t b1, b2, b3;
|
||||
|
||||
if (height > 1) {
|
||||
b2 = ((cy & (1 << i)) >> i);
|
||||
cubeoffset |= (b2 << shift);
|
||||
shift++;
|
||||
height >>= 1;
|
||||
}
|
||||
|
||||
if (width > 1) {
|
||||
b1 = ((cx & (1 << i)) >> i);
|
||||
cubeoffset |= (b1 << shift);
|
||||
shift++;
|
||||
width >>= 1;
|
||||
}
|
||||
|
||||
if (depth > 1) {
|
||||
b3 = ((cz & (1 << i)) >> i);
|
||||
cubeoffset |= (b3 << shift);
|
||||
shift++;
|
||||
depth >>= 1;
|
||||
}
|
||||
|
||||
++i;
|
||||
}
|
||||
|
||||
cubeoffset *= 4 * 4 * 4;
|
||||
|
||||
/* Get to slice. */
|
||||
cubeoffset += 4 * 4 * (coords.z % 4);
|
||||
|
||||
/* Twiddle within slice. */
|
||||
uint32_t r = (coords.y & 1) | ((coords.x & 1) << 1) | (((coords.y & 2) >> 1) << 2) | (((coords.x & 2) >> 1) << 3);
|
||||
|
||||
return cubeoffset + r;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue