From 2ed5aa49a50f4b2fb68540ccb5fc82213d0ee8dc Mon Sep 17 00:00:00 2001 From: Simon Perretta Date: Tue, 4 Feb 2025 21:04:00 +0000 Subject: [PATCH] pco: branching fence support, simple ditr insertion logic Signed-off-by: Simon Perretta Acked-by: Erik Faye-Lund Part-of: --- src/imagination/pco/pco_internal.h | 12 ++++++- src/imagination/pco/pco_map.py | 51 +++++++++++++++++++++++++++++ src/imagination/pco/pco_ops.py | 4 +++ src/imagination/pco/pco_trans_nir.c | 13 ++++++++ 4 files changed, 79 insertions(+), 1 deletion(-) diff --git a/src/imagination/pco/pco_internal.h b/src/imagination/pco/pco_internal.h index 0b4b8c231e1..da51f533141 100644 --- a/src/imagination/pco/pco_internal.h +++ b/src/imagination/pco/pco_internal.h @@ -2887,6 +2887,14 @@ static inline unsigned pco_branch_rel_offset(pco_igrp *br, pco_cf_node *cf_node) return pco_cf_node_offset(cf_node) - pco_igrp_offset(br); } +static inline unsigned pco_branch_rel_offset_next_igrp(pco_igrp *br) +{ + pco_igrp *next_igrp = pco_next_igrp(br); + assert(next_igrp); + + return pco_igrp_offset(next_igrp) - pco_igrp_offset(br); +} + static inline bool pco_should_skip_pass(const char *pass) { return comma_separated_list_contains(pco_skip_passes, pass); @@ -2913,7 +2921,7 @@ static inline bool pco_should_skip_pass(const char *pass) } \ } while (0) -/* Common hw constants. */ +/* Common hw constants/references. */ /** Integer/float zero. */ #define pco_zero pco_ref_hwreg(0, PCO_REG_CLASS_CONST) @@ -2945,6 +2953,8 @@ static inline bool pco_should_skip_pass(const char *pass) /** Float infinity. */ #define pco_finf pco_ref_hwreg(142, PCO_REG_CLASS_CONST) +#define pco_p0 pco_ref_pred(PCO_PRED_P0) + /* Printing. */ void pco_print_ref(pco_shader *shader, pco_ref ref); void pco_print_instr(pco_shader *shader, pco_instr *instr); diff --git a/src/imagination/pco/pco_map.py b/src/imagination/pco/pco_map.py index 29b2beafd2d..19af1b6d7e7 100644 --- a/src/imagination/pco/pco_map.py +++ b/src/imagination/pco/pco_map.py @@ -402,6 +402,8 @@ def encode_map(op, encodings, op_ref_maps): assert op.has_target_cf_node encode_variant += f'pco_branch_rel_offset({{1}}->parent_igrp, {{1}}->target_cf_node)' + elif val_spec == 'target_next_igrp': + encode_variant += f'pco_branch_rel_offset_next_igrp({{1}}->parent_igrp)' else: assert struct_field.type.base_type == BaseType.enum @@ -1690,6 +1692,18 @@ encode_map(O_BR, op_ref_maps=[('ctrl', [], [])] ) +encode_map(O_BR_NEXT, + encodings=[ + (I_BRANCH, [ + ('link', False), + ('bpred', 'cc'), + ('abs', False), + ('offset', 'target_next_igrp') + ]) + ], + op_ref_maps=[('ctrl', [], [])] +) + encode_map(O_MUTEX, encodings=[ (I_MUTEX, [ @@ -2649,6 +2663,31 @@ group_map(O_XCHG_ATOMIC, ] ) +group_map(O_FLUSH_P0, + hdr=(I_IGRP_HDR_MAIN, [ + ('oporg', 'p0_p2'), + ('olchk', False), + ('w1p', False), + ('w0p', False), + ('cc', OM_EXEC_CND), + ('end', OM_END), + ('atom', False), + ('rpt', 1) + ]), + enc_ops=[ + ('0', O_IMADD32, ['ft0'], ['pco_zero', 'pco_zero', 'pco_zero', 'pco_p0'], [(OM_S, False)]), + ('2_tst', O_TST, ['ftt', 'pco_p0'], ['is1', '_'], [(OM_TST_OP_MAIN, 'gzero'), (OM_TST_TYPE_MAIN, 'u32'), (OM_PHASE2END, True)]), + ], + srcs=[ + ('s[0]', ('0', SRC(0)), 's0'), + ('s[1]', ('0', SRC(1)), 's1'), + ('s[2]', ('0', SRC(2)), 's2') + ], + iss=[ + ('is[1]', 'ft0'), + ] +) + group_map(O_UVSW_WRITE, hdr=(I_IGRP_HDR_MAIN, [ ('oporg', 'be'), @@ -3263,6 +3302,18 @@ group_map(O_BR, enc_ops=[('ctrl', O_BR)] ) +group_map(O_BR_NEXT, + hdr=(I_IGRP_HDR_CONTROL, [ + ('olchk', False), + ('w1p', False), + ('w0p', False), + ('cc', OM_EXEC_CND), + ('miscctl', False), + ('ctrlop', 'b') + ]), + enc_ops=[('ctrl', O_BR_NEXT)] +) + group_map(O_MUTEX, hdr=(I_IGRP_HDR_CONTROL, [ ('olchk', False), diff --git a/src/imagination/pco/pco_ops.py b/src/imagination/pco/pco_ops.py index 38a4dd83569..8cbbfa6c6f4 100644 --- a/src/imagination/pco/pco_ops.py +++ b/src/imagination/pco/pco_ops.py @@ -435,6 +435,8 @@ O_CNDEND = hw_op('cndend', [OM_EXEC_CND], 2, 2) O_BR = hw_op('br', [OM_EXEC_CND, OM_BRANCH_CND, OM_LINK], has_target_cf_node=True) +O_BR_NEXT = hw_op('br.next', [OM_EXEC_CND]) + O_MUTEX = hw_op('mutex', [OM_MTX_OP], 0, 1) # Combination (> 1 instructions per group). @@ -458,6 +460,8 @@ O_ST32 = hw_op('st32', OM_ALU_RPT1 + [OM_MCU_CACHE_MODE_ST], 0, 5) O_IADD32_ATOMIC = hw_op('iadd32.atomic', OM_ALU_ATOMEXT + [OM_S], 2, 3, [], [[RM_ABS, RM_NEG], [RM_ABS, RM_NEG]]) O_XCHG_ATOMIC = hw_op('xchg.atomic', OM_ALU_ATOMEXT, 2, 2, [], [[RM_ABS, RM_NEG], [RM_ABS, RM_NEG]]) +O_FLUSH_P0 = hw_op('flush.p0', [OM_EXEC_CND, OM_END]) + # Pseudo-ops (unmapped). O_FNEG = pseudo_op('fneg', OM_ALU, 1, 1) O_FABS = pseudo_op('fabs', OM_ALU, 1, 1) diff --git a/src/imagination/pco/pco_trans_nir.c b/src/imagination/pco/pco_trans_nir.c index 2c390a1593e..1d37167ab70 100644 --- a/src/imagination/pco/pco_trans_nir.c +++ b/src/imagination/pco/pco_trans_nir.c @@ -42,6 +42,13 @@ static pco_block *trans_cf_nodes(trans_ctx *tctx, struct exec_list *cf_node_list, struct exec_list *nir_cf_node_list); +static inline void pco_fence(pco_builder *b) +{ + pco_flush_p0(b); + pco_br_next(b, .exec_cnd = PCO_EXEC_CND_E1_Z1); + pco_br_next(b, .exec_cnd = PCO_EXEC_CND_E1_Z0); +} + /** * \brief Splits a vector destination into scalar components. * @@ -341,6 +348,9 @@ trans_load_input_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest) PCO_REG_CLASS_COEFF, ROGUE_USC_COEFFICIENT_SET_SIZE); + if (usc_itrsmp_enhanced) + pco_fence(&tctx->b); + return usc_itrsmp_enhanced ? pco_ditrp(&tctx->b, dest, pco_ref_drc(PCO_DRC_0), @@ -367,6 +377,9 @@ trans_load_input_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest) } case INTERP_MODE_NOPERSPECTIVE: + if (usc_itrsmp_enhanced) + pco_fence(&tctx->b); + return usc_itrsmp_enhanced ? pco_ditr(&tctx->b, dest, pco_ref_drc(PCO_DRC_0),