From 4927d2274c779e46975b9a717295a093f1f47ea7 Mon Sep 17 00:00:00 2001 From: Simon Perretta Date: Tue, 1 Apr 2025 12:47:52 +0100 Subject: [PATCH] pco: handle vector ra via parallel copy Signed-off-by: Simon Perretta Acked-by: Erik Faye-Lund Part-of: --- src/imagination/pco/pco_internal.h | 12 +++ src/imagination/pco/pco_map.py | 29 ++++++ src/imagination/pco/pco_ops.py | 2 + src/imagination/pco/pco_ra.c | 152 ++++++++++++++++++++++++++++- 4 files changed, 193 insertions(+), 2 deletions(-) diff --git a/src/imagination/pco/pco_internal.h b/src/imagination/pco/pco_internal.h index b646bbdfbaa..99dd83e38e7 100644 --- a/src/imagination/pco/pco_internal.h +++ b/src/imagination/pco/pco_internal.h @@ -2110,6 +2110,18 @@ static inline bool pco_ref_has_mods_set(pco_ref ref) (ref.elem != 0); } +/** + * \brief Returns whether a reference is a temporary register. + * + * \param[in] ref PCO reference. + * \return True if the reference is a temporary register. + */ +static inline bool pco_ref_is_temp(pco_ref ref) +{ + return pco_ref_is_reg(ref) && + pco_ref_get_reg_class(ref) == PCO_REG_CLASS_TEMP; +} + /* PCO ref builders. */ /** * \brief Builds and returns a null reference. diff --git a/src/imagination/pco/pco_map.py b/src/imagination/pco/pco_map.py index 37b82632928..4b86bfc5fe6 100644 --- a/src/imagination/pco/pco_map.py +++ b/src/imagination/pco/pco_map.py @@ -2693,6 +2693,35 @@ group_map(O_FLUSH_P0, ] ) +group_map(O_MBYP2, + hdr=(I_IGRP_HDR_MAIN, [ + ('oporg', 'p0_p1'), + ('olchk', OM_OLCHK), + ('w1p', True), + ('w0p', True), + ('cc', OM_EXEC_CND), + ('end', OM_END), + ('atom', OM_ATOM), + ('rpt', OM_RPT) + ]), + enc_ops=[ + ('0', O_MBYP, [DEST(0)], [SRC(0)]), + ('1', O_MBYP, [DEST(1)], [SRC(1)]) + ], + srcs=[ + ('s[0]', ('0', SRC(0)), 's0'), + ('s[3]', ('1', SRC(0)), 's3') + ], + iss=[ + ('is[4]', 'ft0'), + ('is[5]', 'ft1'), + ], + dests=[ + ('w[0]', ('0', DEST(0)), 'ft0'), + ('w[1]', ('1', DEST(0)), 'ft1'), + ] +) + group_map(O_UVSW_WRITE, hdr=(I_IGRP_HDR_MAIN, [ ('oporg', 'be'), diff --git a/src/imagination/pco/pco_ops.py b/src/imagination/pco/pco_ops.py index 3958d5155ab..bfc52bbdbf3 100644 --- a/src/imagination/pco/pco_ops.py +++ b/src/imagination/pco/pco_ops.py @@ -464,6 +464,8 @@ O_XCHG_ATOMIC = hw_op('xchg.atomic', OM_ALU_ATOMEXT, 2, 2, [], [[RM_ABS, RM_NEG] O_FLUSH_P0 = hw_op('flush.p0', [OM_EXEC_CND, OM_END]) +O_MBYP2 = hw_op('mbyp2', OM_ALU, 2, 2, [], [[RM_ABS, RM_NEG], [RM_ABS, RM_NEG]]) + # Pseudo-ops (unmapped). O_FNEG = pseudo_op('fneg', OM_ALU, 1, 1) O_FABS = pseudo_op('fabs', OM_ALU, 1, 1) diff --git a/src/imagination/pco/pco_ra.c b/src/imagination/pco/pco_ra.c index ed0814ff45e..e46f4e3adf1 100644 --- a/src/imagination/pco/pco_ra.c +++ b/src/imagination/pco/pco_ra.c @@ -1,6 +1,9 @@ /* * Copyright © 2024 Imagination Technologies Ltd. * + * based in part on asahi driver which is: + * Copyright 2022 Alyssa Rosenzweig + * * SPDX-License-Identifier: MIT */ @@ -169,6 +172,116 @@ static void preproc_vecs(pco_func *func) ralloc_free(mem_ctx); } +typedef struct _pco_copy { + pco_ref src; + pco_ref dest; + bool s1; + + bool done; +} pco_copy; + +static inline bool +copy_blocked(pco_copy *copy, unsigned *temp_use_counts, unsigned lowest_temp) +{ + return temp_use_counts[pco_ref_get_temp(copy->dest) - lowest_temp] > 0; +} + +static inline void +do_copy(pco_builder *b, enum pco_exec_cnd exec_cnd, pco_copy *copy) +{ + if (copy->s1) + pco_movs1(b, copy->dest, copy->src, .exec_cnd = exec_cnd); + else + pco_mbyp(b, copy->dest, copy->src, .exec_cnd = exec_cnd); +} + +static inline void +do_swap(pco_builder *b, enum pco_exec_cnd exec_cnd, pco_copy *copy) +{ + assert(!copy->s1); + + pco_mbyp2(b, + copy->dest, + pco_ref_reset_mods(copy->src), + copy->src, + copy->dest, + .exec_cnd = exec_cnd); +} + +static void emit_copies(pco_builder *b, + struct util_dynarray *copies, + enum pco_exec_cnd exec_cnd, + unsigned highest_temp, + unsigned lowest_temp) +{ + unsigned temp_range = highest_temp - lowest_temp + 1; + unsigned *temp_use_counts = + rzalloc_array_size(NULL, sizeof(*temp_use_counts), temp_range); + pco_copy **temp_writes = + rzalloc_array_size(NULL, sizeof(*temp_writes), temp_range); + + util_dynarray_foreach (copies, pco_copy, copy) { + if (pco_ref_is_temp(copy->src)) + ++temp_use_counts[pco_ref_get_temp(copy->src) - lowest_temp]; + + temp_writes[pco_ref_get_temp(copy->dest) - lowest_temp] = copy; + } + + bool progress = true; + while (progress) { + progress = false; + + util_dynarray_foreach (copies, pco_copy, copy) { + if (!copy->done && !copy_blocked(copy, temp_use_counts, lowest_temp)) { + copy->done = true; + progress = true; + do_copy(b, exec_cnd, copy); + + if (pco_ref_is_temp(copy->src)) + --temp_use_counts[pco_ref_get_temp(copy->src) - lowest_temp]; + + temp_writes[pco_ref_get_temp(copy->dest) - lowest_temp] = NULL; + } + } + + if (progress) + continue; + + util_dynarray_foreach (copies, pco_copy, copy) { + if (copy->done) + continue; + + if (pco_refs_are_equal(copy->src, copy->dest, true)) { + copy->done = true; + continue; + } + + do_swap(b, exec_cnd, copy); + copy->src = pco_ref_reset_mods(copy->src); + + util_dynarray_foreach (copies, pco_copy, blocking) { + if (pco_ref_get_temp(blocking->src) >= + pco_ref_get_temp(copy->dest) && + pco_ref_get_temp(blocking->src) < + (pco_ref_get_temp(copy->dest) + 1)) { + blocking->src = pco_ref_offset(blocking->src, + pco_ref_get_temp(copy->src) - + pco_ref_get_temp(copy->dest)); + } + } + + copy->done = true; + } + } + + util_dynarray_foreach (copies, pco_copy, copy) { + assert(copy->done); + } + + ralloc_free(temp_writes); + ralloc_free(temp_use_counts); +} + /** * \brief Performs register allocation on a function. * @@ -532,6 +645,13 @@ static bool pco_ra_func(pco_func *func, override ? ra_get_node_reg(ra_graph, override->ref.val) : ra_get_node_reg(ra_graph, instr->dest[0].val); + struct util_dynarray copies; + util_dynarray_init(&copies, NULL); + + unsigned highest_temp = 0; + unsigned lowest_temp = ~0; + + enum pco_exec_cnd exec_cnd = pco_instr_get_exec_cnd(instr); pco_foreach_instr_src (psrc, instr) { if (!pco_ref_is_ssa(*psrc) || !_mesa_hash_table_u64_search(overrides, psrc->val) || @@ -554,7 +674,6 @@ static bool pco_ra_func(pco_func *func, ra_get_node_reg(ra_graph, psrc->val + num_ssas); } - enum pco_exec_cnd exec_cnd = pco_instr_get_exec_cnd(instr); for (unsigned u = 0; u < chans; ++u) { pco_ref dest = pco_ref_hwreg(temp_dest_base + offset, PCO_REG_CLASS_TEMP); @@ -570,13 +689,37 @@ static bool pco_ra_func(pco_func *func, pco_ref_xfer_mods(&src, psrc, false); - if (!pco_refs_are_equal(src, dest, true)) { + /* if (!pco_refs_are_equal(src, dest, true)) */ { + highest_temp = + MAX3(highest_temp, + pco_ref_is_temp(src) ? pco_ref_get_temp(src) + : highest_temp, + pco_ref_is_temp(dest) ? pco_ref_get_temp(dest) + : highest_temp); + + lowest_temp = + MIN3(lowest_temp, + pco_ref_is_temp(src) ? pco_ref_get_temp(src) + : lowest_temp, + pco_ref_is_temp(dest) ? pco_ref_get_temp(dest) + : lowest_temp); + pco_copy copy = { + .src = src, + .dest = dest, + .s1 = pco_ref_is_reg(src) && + pco_ref_get_reg_class(src) == PCO_REG_CLASS_SPEC, + }; + + /* if (pco_ref_is_reg(src) && pco_ref_get_reg_class(src) == PCO_REG_CLASS_SPEC) { pco_movs1(&b, dest, src, .exec_cnd = exec_cnd); } else { pco_mbyp(&b, dest, src, .exec_cnd = exec_cnd); } + */ + + util_dynarray_append(&copies, pco_copy, copy); } } @@ -586,6 +729,11 @@ static bool pco_ra_func(pco_func *func, offset += pco_ref_get_chans(*psrc); } + /* Emit copies. */ + emit_copies(&b, &copies, exec_cnd, highest_temp, lowest_temp); + + util_dynarray_fini(&copies); + pco_instr_delete(instr); continue; } else if (instr->op == PCO_OP_COMP) {