pvr, pco: improve indexed reg support, add shared memory support

Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
Simon Perretta 2025-01-22 18:56:27 +00:00 committed by Marge Bot
parent 8b634881f8
commit 3322fafda2
6 changed files with 199 additions and 22 deletions

View file

@ -75,6 +75,8 @@ typedef struct _pco_fs_data {
/** PCO compute shader-specific data. */
typedef struct _pco_cs_data {
unsigned workgroup_size[3]; /** Workgroup size. */
pco_range shmem;
} pco_cs_data;
/** PCO image descriptor metadata. */

View file

@ -2257,9 +2257,30 @@ static inline void pco_ref_hwreg_addr_comps(unsigned index,
addr_comps[1] = pco_ref_hwreg(index + 1, reg_class);
}
static inline bool idx_reg_pointee_is_valid(pco_ref ref)
{
assert(pco_ref_is_idx_reg(ref));
switch (pco_ref_get_reg_class(ref)) {
case PCO_REG_CLASS_TEMP:
case PCO_REG_CLASS_VTXIN:
case PCO_REG_CLASS_COEFF:
case PCO_REG_CLASS_SHARED:
case PCO_REG_CLASS_INDEX:
case PCO_REG_CLASS_PIXOUT:
return true;
default:
break;
}
return false;
}
/**
* \brief Builds and returns an indexed vector hardware register reference.
*
* \param[in] num Index register number.
* \param[in] offset Pointee offset.
* \param[in] reg_class Register class.
* \param[in] chans Number of channels.
@ -2288,9 +2309,9 @@ static inline pco_ref pco_ref_hwreg_idx_vec(unsigned num,
/**
* \brief Builds and returns an indexed scalar hardware register reference.
*
* \param[in] num Index register number.
* \param[in] offset Pointee offset.
* \param[in] reg_class Register class.
* \param[in] chans Number of channels.
* \return Hardware register reference.
*/
static inline pco_ref
@ -2299,6 +2320,34 @@ pco_ref_hwreg_idx(unsigned num, unsigned offset, enum pco_reg_class reg_class)
return pco_ref_hwreg_idx_vec(num, offset, reg_class, 1);
}
/**
* \brief Builds and returns an indexed hardware register reference using an
* existing hardware register reference.
*
* \param[in] ref Base reference
* \param[in] reg_class Register class.
* \param[in] chans Number of channels.
* \return Hardware register reference.
*/
static inline pco_ref pco_ref_hwreg_idx_from(unsigned num, pco_ref ref)
{
assert(pco_ref_is_reg(ref));
pco_ref idx_ref = {
.idx_reg = {
.num = num,
.offset = ref.val,
},
.chans = ref.chans,
.bits = ref.bits,
.type = PCO_REF_TYPE_IDX_REG,
.reg_class = ref.reg_class,
};
assert(idx_reg_pointee_is_valid(idx_ref));
return idx_ref;
}
/**
* \brief Builds and returns an immediate reference.
*

View file

@ -94,6 +94,29 @@ static bool try_legalize_src_mappings(pco_instr *instr,
return progress;
}
static inline bool xfer_op_mods(pco_instr *dest, pco_instr *src)
{
bool all_xfered = true;
for (enum pco_op_mod mod = PCO_OP_MOD_NONE + 1; mod < _PCO_OP_MOD_COUNT;
++mod) {
bool dest_has_mod = pco_instr_has_mod(dest, mod);
bool src_has_mod = pco_instr_has_mod(src, mod);
if (!dest_has_mod && !src_has_mod)
continue;
if (dest_has_mod != src_has_mod) {
all_xfered = false;
continue;
}
pco_instr_set_mod(dest, mod, pco_instr_get_mod(src, mod));
}
return all_xfered;
}
static bool legalize_pseudo(pco_instr *instr)
{
switch (instr->op) {
@ -106,6 +129,33 @@ static bool legalize_pseudo(pco_instr *instr)
return true;
case PCO_OP_MOV_OFFSET: {
pco_builder b =
pco_builder_create(instr->parent_func, pco_cursor_before_instr(instr));
pco_ref dest = instr->dest[0];
pco_ref src = instr->src[0];
pco_ref offset = instr->src[1];
unsigned idx_reg_num = 0;
pco_ref idx_reg =
pco_ref_hwreg_idx(idx_reg_num, idx_reg_num, PCO_REG_CLASS_INDEX);
pco_mbyp(&b, idx_reg, offset, .exec_cnd = pco_instr_get_exec_cnd(instr));
if (pco_instr_get_offset_sd(instr) == PCO_OFFSET_SD_SRC)
src = pco_ref_hwreg_idx_from(idx_reg_num, src);
else
dest = pco_ref_hwreg_idx_from(idx_reg_num, dest);
pco_instr *mbyp = pco_mbyp(&b, dest, src);
xfer_op_mods(mbyp, instr);
pco_instr_delete(instr);
return true;
}
default:
break;
}

View file

@ -326,6 +326,11 @@ OM_MTX_OP = op_mod_enum('mtx_op', [
('lock', 'lock'),
])
OM_OFFSET_SD = op_mod_enum('offset_sd', [
'src',
'dest',
])
# Ops.
OM_ALU = [OM_OLCHK, OM_EXEC_CND, OM_END, OM_ATOM, OM_RPT]
@ -448,6 +453,7 @@ O_FNEG = pseudo_op('fneg', OM_ALU, 1, 1)
O_FABS = pseudo_op('fabs', OM_ALU, 1, 1)
O_FFLR = pseudo_op('fflr', OM_ALU, 1, 1)
O_MOV = pseudo_op('mov', OM_ALU, 1, 1)
O_MOV_OFFSET = pseudo_op('mov.offset', OM_ALU + [OM_OFFSET_SD], 1, 2)
O_VEC = pseudo_op('vec', [OM_EXEC_CND], 1, VARIABLE, [], [[RM_ABS, RM_NEG]])
O_COMP = pseudo_op('comp', [], 1, 2)

View file

@ -458,39 +458,73 @@ static unsigned fetch_resource_base_reg_packed(const pco_common_data *common,
return fetch_resource_base_reg(common, desc_set, binding, elem, is_img_smp);
}
static pco_instr *trans_load_push_constant(trans_ctx *tctx,
nir_intrinsic_instr *intr,
pco_ref dest,
pco_ref src)
static pco_instr *trans_load_common_store(trans_ctx *tctx,
nir_intrinsic_instr *intr,
pco_ref dest,
pco_ref offset_src,
bool coeffs,
pco_range *range)
{
const pco_common_data *common = &tctx->shader->data.common;
nir_src *noffset_src = &intr->src[0];
enum pco_reg_class reg_class = coeffs ? PCO_REG_CLASS_COEFF
: PCO_REG_CLASS_SHARED;
unsigned chans = pco_ref_get_chans(dest);
ASSERTED unsigned bits = pco_ref_get_bits(dest);
assert(bits == 32);
assert(common->push_consts.range.count > 0);
assert(range->count > 0);
if (nir_src_is_const(intr->src[0])) {
unsigned offset = nir_src_as_uint(intr->src[0]);
assert(offset < common->push_consts.range.count);
if (pco_ref_is_null(offset_src) || nir_src_is_const(*noffset_src)) {
unsigned offset =
pco_ref_is_null(offset_src) ? 0 : nir_src_as_uint(*noffset_src);
assert(offset < range->count);
unsigned reg_index = common->push_consts.range.start + offset;
src = pco_ref_hwreg_vec(reg_index, PCO_REG_CLASS_SHARED, chans);
pco_ref src = pco_ref_hwreg_vec(range->start + offset, reg_class, chans);
return pco_mov(&tctx->b, dest, src, .rpt = chans);
}
/* Use the dynamic offset to set up the index register. */
pco_ref idx_reg = pco_ref_hwreg_idx(0, 0, PCO_REG_CLASS_INDEX);
pco_mov(&tctx->b, idx_reg, src);
pco_ref src_base = pco_ref_hwreg_vec(range->start, reg_class, chans);
return pco_mov_offset(&tctx->b,
dest,
src_base,
offset_src,
.offset_sd = PCO_OFFSET_SD_SRC,
.rpt = chans);
}
pco_ref idx_src = pco_ref_hwreg_idx_vec(0,
common->push_consts.range.start,
PCO_REG_CLASS_SHARED,
chans);
static pco_instr *trans_store_common_store(trans_ctx *tctx,
nir_intrinsic_instr *intr,
pco_ref data,
pco_ref offset_src,
bool coeffs,
pco_range *range)
{
nir_src *noffset_src = &intr->src[1];
enum pco_reg_class reg_class = coeffs ? PCO_REG_CLASS_COEFF
: PCO_REG_CLASS_SHARED;
return pco_mov(&tctx->b, dest, idx_src, .rpt = chans);
unsigned chans = pco_ref_get_chans(data);
ASSERTED unsigned bits = pco_ref_get_bits(data);
assert(bits == 32);
assert(range->count > 0);
if (nir_src_is_const(*noffset_src)) {
unsigned offset = nir_src_as_uint(*noffset_src);
assert(offset < range->count);
pco_ref dest = pco_ref_hwreg_vec(range->start + offset, reg_class, chans);
return pco_mov(&tctx->b, dest, data, .rpt = chans);
}
pco_ref dest_base = pco_ref_hwreg_vec(range->start, reg_class, chans);
return pco_mov_offset(&tctx->b,
dest_base,
data,
offset_src,
.offset_sd = PCO_OFFSET_SD_DEST,
.rpt = chans);
}
static pco_instr *trans_load_buffer(trans_ctx *tctx,
@ -1011,7 +1045,33 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
break;
case nir_intrinsic_load_push_constant:
instr = trans_load_push_constant(tctx, intr, dest, src[0]);
instr =
trans_load_common_store(tctx,
intr,
dest,
src[0],
false,
&tctx->shader->data.common.push_consts.range);
break;
case nir_intrinsic_load_shared:
assert(tctx->stage == MESA_SHADER_COMPUTE);
instr = trans_load_common_store(tctx,
intr,
dest,
src[0],
true,
&tctx->shader->data.cs.shmem);
break;
case nir_intrinsic_store_shared:
assert(tctx->stage == MESA_SHADER_COMPUTE);
instr = trans_store_common_store(tctx,
intr,
src[0],
src[1],
true,
&tctx->shader->data.cs.shmem);
break;
case nir_intrinsic_load_ubo:

View file

@ -1940,6 +1940,15 @@ static void pvr_alloc_cs_sysvals(pco_data *data, nir_shader *nir)
assert(BITSET_IS_EMPTY(system_values_read));
}
static void pvr_alloc_cs_shmem(pco_data *data, nir_shader *nir)
{
assert(!nir->info.cs.has_variable_shared_mem);
assert(!nir->info.zero_initialize_shared_memory);
data->cs.shmem.count = nir->info.shared_size >> 2;
data->common.coeffs += data->cs.shmem.count;
}
static void pvr_init_descriptors(pco_data *data,
nir_shader *nir,
struct vk_pipeline_layout *layout)
@ -2122,6 +2131,7 @@ static void pvr_postprocess_shader_data(pco_data *data,
case MESA_SHADER_COMPUTE: {
pvr_alloc_cs_sysvals(data, nir);
pvr_alloc_cs_shmem(data, nir);
break;
}