pco: image atomics support

Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
Simon Perretta 2025-05-08 18:19:43 +01:00 committed by Marge Bot
parent 672541d036
commit c3325b22d8
7 changed files with 150 additions and 11 deletions

View file

@ -859,6 +859,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
case nir_intrinsic_global_atomic_swap_agx:
case nir_intrinsic_global_atomic_2x32:
case nir_intrinsic_global_atomic_swap_2x32:
case nir_intrinsic_global_atomic_pco:
case nir_intrinsic_atomic_counter_add:
case nir_intrinsic_atomic_counter_min:
case nir_intrinsic_atomic_counter_max:

View file

@ -895,6 +895,10 @@ intrinsic("load_vulkan_descriptor", src_comp=[-1], dest_comp=0,
#
# AGX global variants take a 64-bit base address plus a 32-bit offset in words.
# The offset is sign-extended or zero-extended based on the SIGN_EXTEND index.
#
# PCO global variants use a vec3 for the memory address and data, where component X
# has the low 32 address bits, component Y has the high 32 address bits, and component Z
# has the data parameter.
intrinsic("deref_atomic", src_comp=[-1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP])
intrinsic("ssbo_atomic", src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP, OFFSET_SHIFT])
@ -904,6 +908,7 @@ intrinsic("global_atomic", src_comp=[1, 1], dest_comp=1, indices=[ATOMIC_OP])
intrinsic("global_atomic_2x32", src_comp=[2, 1], dest_comp=1, indices=[ATOMIC_OP])
intrinsic("global_atomic_amd", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE, ATOMIC_OP])
intrinsic("global_atomic_agx", src_comp=[1, 1, 1], dest_comp=1, indices=[ATOMIC_OP, SIGN_EXTEND])
intrinsic("global_atomic_pco", src_comp=[3], dest_comp=1, indices=[ATOMIC_OP], bit_sizes=[32])
intrinsic("deref_atomic_swap", src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP])
intrinsic("ssbo_atomic_swap", src_comp=[-1, 1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP, OFFSET_SHIFT])

View file

@ -1595,6 +1595,18 @@ encode_map(O_REV,
op_ref_maps=[('0', ['ft2'], ['s2'])]
)
encode_map(O_SHUFFLE,
encodings=[
(I_PHASE0_SRC, [
('count_src', 'ft2'),
('count_op', 'byp'),
('bitmask_src_op', 'byp'),
('shift1_op', 'shfl')
])
],
op_ref_maps=[('0', ['ft2'], ['s2', 's1'])]
)
encode_map(O_LOGICAL,
encodings=[
(I_PHASE1, [
@ -3283,6 +3295,25 @@ group_map(O_REV,
dests=[('w[1]', ('0', DEST(0)), 'ft2')]
)
group_map(O_SHUFFLE,
hdr=(I_IGRP_HDR_BITWISE, [
('opcnt', 'p0'),
('olchk', OM_OLCHK),
('w1p', True),
('w0p', False),
('cc', OM_EXEC_CND),
('end', OM_END),
('atom', OM_ATOM),
('rpt', OM_RPT)
]),
enc_ops=[('0', O_SHUFFLE)],
srcs=[
('s[1]', ('0', SRC(1)), 's1'),
('s[2]', ('0', SRC(0)), 's2')
],
dests=[('w[1]', ('0', DEST(0)), 'ft2')]
)
group_map(O_LOGICAL,
hdr=(I_IGRP_HDR_BITWISE, [
('opcnt', ['p0', 'p1']),

View file

@ -797,7 +797,7 @@ static nir_def *lower_image(nir_builder *b, nir_instr *instr, void *cb_data)
bool is_cube_array = image_dim == GLSL_SAMPLER_DIM_CUBE && is_array;
nir_def *lod;
nir_def *lod = NULL;
switch (intr->intrinsic) {
case nir_intrinsic_image_deref_load:
lod = intr->src[3].ssa;
@ -811,6 +811,10 @@ static nir_def *lower_image(nir_builder *b, nir_instr *instr, void *cb_data)
lod = intr->src[1].ssa;
break;
case nir_intrinsic_image_deref_atomic:
case nir_intrinsic_image_deref_atomic_swap:
break;
default:
UNREACHABLE("");
}
@ -862,9 +866,11 @@ static nir_def *lower_image(nir_builder *b, nir_instr *instr, void *cb_data)
return nir_vec(b, size_comps, intr->def.num_components);
}
nir_alu_type type = intr->intrinsic == nir_intrinsic_image_deref_load
? nir_intrinsic_dest_type(intr)
: nir_intrinsic_src_type(intr);
nir_alu_type type = nir_type_invalid;
if (intr->intrinsic == nir_intrinsic_image_deref_load)
type = nir_intrinsic_dest_type(intr);
else if (intr->intrinsic == nir_intrinsic_image_deref_store)
type = nir_intrinsic_src_type(intr);
bool msaa = image_dim == GLSL_SAMPLER_DIM_MS ||
image_dim == GLSL_SAMPLER_DIM_SUBPASS_MS;
@ -1050,24 +1056,70 @@ static nir_def *lower_image(nir_builder *b, nir_instr *instr, void *cb_data)
}
}
unsigned smp_desc = ia ? PCO_IA_SAMPLER : PCO_POINT_SAMPLER;
nir_def *tex_state = nir_load_tex_state_pco(b,
ROGUE_NUM_TEXSTATE_DWORDS,
elem,
.desc_set = desc_set,
.binding = binding);
unsigned num_coord_comps = nir_image_intrinsic_coord_components(intr);
if (coords)
coords = nir_trim_vector(b, coords, num_coord_comps);
if (intr->intrinsic == nir_intrinsic_image_deref_atomic ||
intr->intrinsic == nir_intrinsic_image_deref_atomic_swap) {
assert(image_dim == GLSL_SAMPLER_DIM_2D);
assert(!is_array);
assert(util_format_is_plain(format));
assert(util_format_is_pure_integer(format));
assert(util_format_get_nr_components(format) == 1);
assert(util_format_get_blockwidth(format) == 1);
assert(util_format_get_blockheight(format) == 1);
assert(util_format_get_blockdepth(format) == 1);
assert(util_format_get_blocksize(format) == sizeof(uint32_t));
nir_def *tex_state_word[] = {
[0] = nir_channel(b, tex_state, 0),
[1] = nir_channel(b, tex_state, 1),
[2] = nir_channel(b, tex_state, 2),
[3] = nir_channel(b, tex_state, 3),
};
nir_def *base_addr_lo;
nir_def *base_addr_hi;
unpack_base_addr(b, tex_state_word, &base_addr_lo, &base_addr_hi);
/* Calculate untwiddled offset. */
nir_def *x = nir_i2i16(b, nir_channel(b, coords, 0));
nir_def *y = nir_i2i16(b, nir_channel(b, coords, 1));
nir_def *twiddled_offset = nir_interleave(b, y, x);
twiddled_offset =
nir_imul_imm(b, twiddled_offset, util_format_get_blocksize(format));
/* Offset the address by the co-ordinates. */
nir_def *addr =
nir_uadd64_32(b, base_addr_lo, base_addr_hi, twiddled_offset);
nir_def *addr_lo = nir_channel(b, addr, 0);
nir_def *addr_hi = nir_channel(b, addr, 1);
nir_def *data = intr->src[3].ssa;
nir_def *addr_data = nir_vec3(b, addr_lo, addr_hi, data);
return nir_global_atomic_pco(b,
addr_data,
.atomic_op = nir_intrinsic_atomic_op(intr));
}
unsigned smp_desc = ia ? PCO_IA_SAMPLER : PCO_POINT_SAMPLER;
nir_def *smp_state = nir_load_smp_state_pco(b,
ROGUE_NUM_TEXSTATE_DWORDS,
nir_imm_int(b, 0),
.desc_set = smp_desc,
.binding = smp_desc);
unsigned num_coord_comps = nir_image_intrinsic_coord_components(intr);
if (coords)
coords = nir_trim_vector(b, coords, num_coord_comps);
/* Special case, override buffers to be 2D. */
if (image_dim == GLSL_SAMPLER_DIM_BUF) {
image_dim = GLSL_SAMPLER_DIM_2D;
@ -1200,6 +1252,7 @@ static bool is_image(const nir_instr *instr, UNUSED const void *cb_data)
switch (intr->intrinsic) {
case nir_intrinsic_image_deref_load:
case nir_intrinsic_image_deref_store:
case nir_intrinsic_image_deref_atomic:
case nir_intrinsic_image_deref_size:
return true;

View file

@ -205,7 +205,9 @@ lower_image_derefs(nir_builder *b, nir_intrinsic_instr *intr, pco_data *data)
/* Sampler not needed for on-chip input attachments. */
data->common.uses.ia_sampler = true;
} else {
} else if (intr->intrinsic == nir_intrinsic_image_deref_load ||
intr->intrinsic == nir_intrinsic_image_deref_store) {
/* Sampler not needed for other types of image accesses. */
data->common.uses.point_sampler = true;
}
@ -240,6 +242,8 @@ static nir_def *lower_vk(nir_builder *b, nir_instr *instr, void *cb_data)
case nir_intrinsic_image_deref_load:
case nir_intrinsic_image_deref_store:
case nir_intrinsic_image_deref_atomic:
case nir_intrinsic_image_deref_atomic_swap:
case nir_intrinsic_image_deref_size:
return lower_image_derefs(b, intr, data);
@ -279,6 +283,8 @@ static bool is_vk(const nir_instr *instr, UNUSED const void *cb_data)
case nir_intrinsic_load_vulkan_descriptor:
case nir_intrinsic_image_deref_load:
case nir_intrinsic_image_deref_store:
case nir_intrinsic_image_deref_atomic:
case nir_intrinsic_image_deref_atomic_swap:
case nir_intrinsic_image_deref_size:
return true;

View file

@ -411,6 +411,7 @@ O_MOVI32 = hw_op('movi32', OM_ALU, 1, 1)
O_CBS = hw_op('cbs', OM_ALU, 1, 1)
O_FTB = hw_op('ftb', OM_ALU, 1, 1)
O_REV = hw_op('rev', OM_ALU, 1, 1)
O_SHUFFLE = hw_op('shuffle', OM_ALU, 1, 2)
O_LOGICAL = hw_op('logical', OM_ALU + [OM_LOGIOP], 1, 4)
O_SHIFT = hw_op('shift', OM_ALU + [OM_SHIFTOP], 1, 3)

View file

@ -1165,6 +1165,35 @@ static pco_instr *trans_atomic_buffer(trans_ctx *tctx,
UNREACHABLE("");
}
static pco_instr *trans_global_atomic_buffer(trans_ctx *tctx,
nir_intrinsic_instr *intr,
pco_ref dest,
pco_ref addr_data)
{
enum pco_atom_op atom_op = to_atom_op(nir_intrinsic_atomic_op(intr));
/* Should have been lowered. */
assert(atom_op != PCO_ATOM_OP_CMPXCHG);
ASSERTED unsigned chans = pco_ref_get_chans(dest);
unsigned bits = pco_ref_get_bits(dest);
assert(chans == 1);
switch (bits) {
case 32:
return pco_atomic(&tctx->b,
dest,
pco_ref_drc(PCO_DRC_0),
addr_data,
.atom_op = atom_op);
default:
break;
}
UNREACHABLE("");
}
static inline enum pco_reg_class sys_val_to_reg_class(gl_system_value sys_val,
mesa_shader_stage stage)
{
@ -1593,6 +1622,10 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
instr = trans_atomic_buffer(tctx, intr, dest, src[1], src[2]);
break;
case nir_intrinsic_global_atomic_pco:
instr = trans_global_atomic_buffer(tctx, intr, dest, src[0]);
break;
/* Vertex sysvals. */
case nir_intrinsic_load_vertex_id:
case nir_intrinsic_load_instance_id:
@ -2571,6 +2604,10 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
instr = pco_rev(&tctx->b, dest, src[0]);
break;
case nir_op_interleave:
instr = pco_shuffle(&tctx->b, dest, src[0], src[1]);
break;
case nir_op_f2i32:
instr = pco_pck(&tctx->b,
dest,
@ -2599,6 +2636,11 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
.pck_fmt = PCO_PCK_FMT_F16F16);
break;
/* Just consume/treat as 32-bit for now. */
case nir_op_i2i16:
instr = pco_mov(&tctx->b, pco_ref_bits(dest, 32), src[0]);
break;
case nir_op_f2i32_rtne:
instr = pco_pck(&tctx->b, dest, src[0], .pck_fmt = PCO_PCK_FMT_S32);
break;