diff --git a/src/imagination/pco/pco_data.h b/src/imagination/pco/pco_data.h index 5e1c1dc476d..4f04d0b8a3f 100644 --- a/src/imagination/pco/pco_data.h +++ b/src/imagination/pco/pco_data.h @@ -75,6 +75,8 @@ typedef struct _pco_fs_data { /** PCO compute shader-specific data. */ typedef struct _pco_cs_data { unsigned workgroup_size[3]; /** Workgroup size. */ + + pco_range shmem; } pco_cs_data; /** PCO image descriptor metadata. */ diff --git a/src/imagination/pco/pco_internal.h b/src/imagination/pco/pco_internal.h index 4ca5c588555..707da5e93c4 100644 --- a/src/imagination/pco/pco_internal.h +++ b/src/imagination/pco/pco_internal.h @@ -2257,9 +2257,30 @@ static inline void pco_ref_hwreg_addr_comps(unsigned index, addr_comps[1] = pco_ref_hwreg(index + 1, reg_class); } +static inline bool idx_reg_pointee_is_valid(pco_ref ref) +{ + assert(pco_ref_is_idx_reg(ref)); + + switch (pco_ref_get_reg_class(ref)) { + case PCO_REG_CLASS_TEMP: + case PCO_REG_CLASS_VTXIN: + case PCO_REG_CLASS_COEFF: + case PCO_REG_CLASS_SHARED: + case PCO_REG_CLASS_INDEX: + case PCO_REG_CLASS_PIXOUT: + return true; + + default: + break; + } + + return false; +} + /** * \brief Builds and returns an indexed vector hardware register reference. * + * \param[in] num Index register number. * \param[in] offset Pointee offset. * \param[in] reg_class Register class. * \param[in] chans Number of channels. @@ -2288,9 +2309,9 @@ static inline pco_ref pco_ref_hwreg_idx_vec(unsigned num, /** * \brief Builds and returns an indexed scalar hardware register reference. * + * \param[in] num Index register number. * \param[in] offset Pointee offset. * \param[in] reg_class Register class. - * \param[in] chans Number of channels. * \return Hardware register reference. */ static inline pco_ref @@ -2299,6 +2320,34 @@ pco_ref_hwreg_idx(unsigned num, unsigned offset, enum pco_reg_class reg_class) return pco_ref_hwreg_idx_vec(num, offset, reg_class, 1); } +/** + * \brief Builds and returns an indexed hardware register reference using an + * existing hardware register reference. + * + * \param[in] ref Base reference + * \param[in] reg_class Register class. + * \param[in] chans Number of channels. + * \return Hardware register reference. + */ +static inline pco_ref pco_ref_hwreg_idx_from(unsigned num, pco_ref ref) +{ + assert(pco_ref_is_reg(ref)); + + pco_ref idx_ref = { + .idx_reg = { + .num = num, + .offset = ref.val, + }, + .chans = ref.chans, + .bits = ref.bits, + .type = PCO_REF_TYPE_IDX_REG, + .reg_class = ref.reg_class, + }; + + assert(idx_reg_pointee_is_valid(idx_ref)); + return idx_ref; +} + /** * \brief Builds and returns an immediate reference. * diff --git a/src/imagination/pco/pco_legalize.c b/src/imagination/pco/pco_legalize.c index 69e46790663..0abde31a92c 100644 --- a/src/imagination/pco/pco_legalize.c +++ b/src/imagination/pco/pco_legalize.c @@ -94,6 +94,29 @@ static bool try_legalize_src_mappings(pco_instr *instr, return progress; } +static inline bool xfer_op_mods(pco_instr *dest, pco_instr *src) +{ + bool all_xfered = true; + + for (enum pco_op_mod mod = PCO_OP_MOD_NONE + 1; mod < _PCO_OP_MOD_COUNT; + ++mod) { + bool dest_has_mod = pco_instr_has_mod(dest, mod); + bool src_has_mod = pco_instr_has_mod(src, mod); + + if (!dest_has_mod && !src_has_mod) + continue; + + if (dest_has_mod != src_has_mod) { + all_xfered = false; + continue; + } + + pco_instr_set_mod(dest, mod, pco_instr_get_mod(src, mod)); + } + + return all_xfered; +} + static bool legalize_pseudo(pco_instr *instr) { switch (instr->op) { @@ -106,6 +129,33 @@ static bool legalize_pseudo(pco_instr *instr) return true; + case PCO_OP_MOV_OFFSET: { + pco_builder b = + pco_builder_create(instr->parent_func, pco_cursor_before_instr(instr)); + + pco_ref dest = instr->dest[0]; + pco_ref src = instr->src[0]; + pco_ref offset = instr->src[1]; + + unsigned idx_reg_num = 0; + pco_ref idx_reg = + pco_ref_hwreg_idx(idx_reg_num, idx_reg_num, PCO_REG_CLASS_INDEX); + + pco_mbyp(&b, idx_reg, offset, .exec_cnd = pco_instr_get_exec_cnd(instr)); + + if (pco_instr_get_offset_sd(instr) == PCO_OFFSET_SD_SRC) + src = pco_ref_hwreg_idx_from(idx_reg_num, src); + else + dest = pco_ref_hwreg_idx_from(idx_reg_num, dest); + + pco_instr *mbyp = pco_mbyp(&b, dest, src); + xfer_op_mods(mbyp, instr); + + pco_instr_delete(instr); + + return true; + } + default: break; } diff --git a/src/imagination/pco/pco_ops.py b/src/imagination/pco/pco_ops.py index 79b33086739..a9c7ab449f2 100644 --- a/src/imagination/pco/pco_ops.py +++ b/src/imagination/pco/pco_ops.py @@ -326,6 +326,11 @@ OM_MTX_OP = op_mod_enum('mtx_op', [ ('lock', 'lock'), ]) +OM_OFFSET_SD = op_mod_enum('offset_sd', [ + 'src', + 'dest', +]) + # Ops. OM_ALU = [OM_OLCHK, OM_EXEC_CND, OM_END, OM_ATOM, OM_RPT] @@ -448,6 +453,7 @@ O_FNEG = pseudo_op('fneg', OM_ALU, 1, 1) O_FABS = pseudo_op('fabs', OM_ALU, 1, 1) O_FFLR = pseudo_op('fflr', OM_ALU, 1, 1) O_MOV = pseudo_op('mov', OM_ALU, 1, 1) +O_MOV_OFFSET = pseudo_op('mov.offset', OM_ALU + [OM_OFFSET_SD], 1, 2) O_VEC = pseudo_op('vec', [OM_EXEC_CND], 1, VARIABLE, [], [[RM_ABS, RM_NEG]]) O_COMP = pseudo_op('comp', [], 1, 2) diff --git a/src/imagination/pco/pco_trans_nir.c b/src/imagination/pco/pco_trans_nir.c index 90d4537b839..2fe37ba8917 100644 --- a/src/imagination/pco/pco_trans_nir.c +++ b/src/imagination/pco/pco_trans_nir.c @@ -458,39 +458,73 @@ static unsigned fetch_resource_base_reg_packed(const pco_common_data *common, return fetch_resource_base_reg(common, desc_set, binding, elem, is_img_smp); } -static pco_instr *trans_load_push_constant(trans_ctx *tctx, - nir_intrinsic_instr *intr, - pco_ref dest, - pco_ref src) +static pco_instr *trans_load_common_store(trans_ctx *tctx, + nir_intrinsic_instr *intr, + pco_ref dest, + pco_ref offset_src, + bool coeffs, + pco_range *range) { - const pco_common_data *common = &tctx->shader->data.common; + nir_src *noffset_src = &intr->src[0]; + enum pco_reg_class reg_class = coeffs ? PCO_REG_CLASS_COEFF + : PCO_REG_CLASS_SHARED; unsigned chans = pco_ref_get_chans(dest); ASSERTED unsigned bits = pco_ref_get_bits(dest); assert(bits == 32); - assert(common->push_consts.range.count > 0); + assert(range->count > 0); - if (nir_src_is_const(intr->src[0])) { - unsigned offset = nir_src_as_uint(intr->src[0]); - assert(offset < common->push_consts.range.count); + if (pco_ref_is_null(offset_src) || nir_src_is_const(*noffset_src)) { + unsigned offset = + pco_ref_is_null(offset_src) ? 0 : nir_src_as_uint(*noffset_src); + assert(offset < range->count); - unsigned reg_index = common->push_consts.range.start + offset; - - src = pco_ref_hwreg_vec(reg_index, PCO_REG_CLASS_SHARED, chans); + pco_ref src = pco_ref_hwreg_vec(range->start + offset, reg_class, chans); return pco_mov(&tctx->b, dest, src, .rpt = chans); } - /* Use the dynamic offset to set up the index register. */ - pco_ref idx_reg = pco_ref_hwreg_idx(0, 0, PCO_REG_CLASS_INDEX); - pco_mov(&tctx->b, idx_reg, src); + pco_ref src_base = pco_ref_hwreg_vec(range->start, reg_class, chans); + return pco_mov_offset(&tctx->b, + dest, + src_base, + offset_src, + .offset_sd = PCO_OFFSET_SD_SRC, + .rpt = chans); +} - pco_ref idx_src = pco_ref_hwreg_idx_vec(0, - common->push_consts.range.start, - PCO_REG_CLASS_SHARED, - chans); +static pco_instr *trans_store_common_store(trans_ctx *tctx, + nir_intrinsic_instr *intr, + pco_ref data, + pco_ref offset_src, + bool coeffs, + pco_range *range) +{ + nir_src *noffset_src = &intr->src[1]; + enum pco_reg_class reg_class = coeffs ? PCO_REG_CLASS_COEFF + : PCO_REG_CLASS_SHARED; - return pco_mov(&tctx->b, dest, idx_src, .rpt = chans); + unsigned chans = pco_ref_get_chans(data); + ASSERTED unsigned bits = pco_ref_get_bits(data); + assert(bits == 32); + + assert(range->count > 0); + + if (nir_src_is_const(*noffset_src)) { + unsigned offset = nir_src_as_uint(*noffset_src); + assert(offset < range->count); + + pco_ref dest = pco_ref_hwreg_vec(range->start + offset, reg_class, chans); + return pco_mov(&tctx->b, dest, data, .rpt = chans); + } + + pco_ref dest_base = pco_ref_hwreg_vec(range->start, reg_class, chans); + return pco_mov_offset(&tctx->b, + dest_base, + data, + offset_src, + .offset_sd = PCO_OFFSET_SD_DEST, + .rpt = chans); } static pco_instr *trans_load_buffer(trans_ctx *tctx, @@ -1011,7 +1045,33 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr) break; case nir_intrinsic_load_push_constant: - instr = trans_load_push_constant(tctx, intr, dest, src[0]); + instr = + trans_load_common_store(tctx, + intr, + dest, + src[0], + false, + &tctx->shader->data.common.push_consts.range); + break; + + case nir_intrinsic_load_shared: + assert(tctx->stage == MESA_SHADER_COMPUTE); + instr = trans_load_common_store(tctx, + intr, + dest, + src[0], + true, + &tctx->shader->data.cs.shmem); + break; + + case nir_intrinsic_store_shared: + assert(tctx->stage == MESA_SHADER_COMPUTE); + instr = trans_store_common_store(tctx, + intr, + src[0], + src[1], + true, + &tctx->shader->data.cs.shmem); break; case nir_intrinsic_load_ubo: diff --git a/src/imagination/vulkan/pvr_pipeline.c b/src/imagination/vulkan/pvr_pipeline.c index f531367934c..deeac2c5685 100644 --- a/src/imagination/vulkan/pvr_pipeline.c +++ b/src/imagination/vulkan/pvr_pipeline.c @@ -1940,6 +1940,15 @@ static void pvr_alloc_cs_sysvals(pco_data *data, nir_shader *nir) assert(BITSET_IS_EMPTY(system_values_read)); } +static void pvr_alloc_cs_shmem(pco_data *data, nir_shader *nir) +{ + assert(!nir->info.cs.has_variable_shared_mem); + assert(!nir->info.zero_initialize_shared_memory); + + data->cs.shmem.count = nir->info.shared_size >> 2; + data->common.coeffs += data->cs.shmem.count; +} + static void pvr_init_descriptors(pco_data *data, nir_shader *nir, struct vk_pipeline_layout *layout) @@ -2122,6 +2131,7 @@ static void pvr_postprocess_shader_data(pco_data *data, case MESA_SHADER_COMPUTE: { pvr_alloc_cs_sysvals(data, nir); + pvr_alloc_cs_shmem(data, nir); break; }