pco: branching fence support, simple ditr insertion logic

Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
Simon Perretta 2025-02-04 21:04:00 +00:00 committed by Marge Bot
parent 8baa387270
commit 2ed5aa49a5
4 changed files with 79 additions and 1 deletions

View file

@ -2887,6 +2887,14 @@ static inline unsigned pco_branch_rel_offset(pco_igrp *br, pco_cf_node *cf_node)
return pco_cf_node_offset(cf_node) - pco_igrp_offset(br);
}
static inline unsigned pco_branch_rel_offset_next_igrp(pco_igrp *br)
{
pco_igrp *next_igrp = pco_next_igrp(br);
assert(next_igrp);
return pco_igrp_offset(next_igrp) - pco_igrp_offset(br);
}
static inline bool pco_should_skip_pass(const char *pass)
{
return comma_separated_list_contains(pco_skip_passes, pass);
@ -2913,7 +2921,7 @@ static inline bool pco_should_skip_pass(const char *pass)
} \
} while (0)
/* Common hw constants. */
/* Common hw constants/references. */
/** Integer/float zero. */
#define pco_zero pco_ref_hwreg(0, PCO_REG_CLASS_CONST)
@ -2945,6 +2953,8 @@ static inline bool pco_should_skip_pass(const char *pass)
/** Float infinity. */
#define pco_finf pco_ref_hwreg(142, PCO_REG_CLASS_CONST)
#define pco_p0 pco_ref_pred(PCO_PRED_P0)
/* Printing. */
void pco_print_ref(pco_shader *shader, pco_ref ref);
void pco_print_instr(pco_shader *shader, pco_instr *instr);

View file

@ -402,6 +402,8 @@ def encode_map(op, encodings, op_ref_maps):
assert op.has_target_cf_node
encode_variant += f'pco_branch_rel_offset({{1}}->parent_igrp, {{1}}->target_cf_node)'
elif val_spec == 'target_next_igrp':
encode_variant += f'pco_branch_rel_offset_next_igrp({{1}}->parent_igrp)'
else:
assert struct_field.type.base_type == BaseType.enum
@ -1690,6 +1692,18 @@ encode_map(O_BR,
op_ref_maps=[('ctrl', [], [])]
)
encode_map(O_BR_NEXT,
encodings=[
(I_BRANCH, [
('link', False),
('bpred', 'cc'),
('abs', False),
('offset', 'target_next_igrp')
])
],
op_ref_maps=[('ctrl', [], [])]
)
encode_map(O_MUTEX,
encodings=[
(I_MUTEX, [
@ -2649,6 +2663,31 @@ group_map(O_XCHG_ATOMIC,
]
)
group_map(O_FLUSH_P0,
hdr=(I_IGRP_HDR_MAIN, [
('oporg', 'p0_p2'),
('olchk', False),
('w1p', False),
('w0p', False),
('cc', OM_EXEC_CND),
('end', OM_END),
('atom', False),
('rpt', 1)
]),
enc_ops=[
('0', O_IMADD32, ['ft0'], ['pco_zero', 'pco_zero', 'pco_zero', 'pco_p0'], [(OM_S, False)]),
('2_tst', O_TST, ['ftt', 'pco_p0'], ['is1', '_'], [(OM_TST_OP_MAIN, 'gzero'), (OM_TST_TYPE_MAIN, 'u32'), (OM_PHASE2END, True)]),
],
srcs=[
('s[0]', ('0', SRC(0)), 's0'),
('s[1]', ('0', SRC(1)), 's1'),
('s[2]', ('0', SRC(2)), 's2')
],
iss=[
('is[1]', 'ft0'),
]
)
group_map(O_UVSW_WRITE,
hdr=(I_IGRP_HDR_MAIN, [
('oporg', 'be'),
@ -3263,6 +3302,18 @@ group_map(O_BR,
enc_ops=[('ctrl', O_BR)]
)
group_map(O_BR_NEXT,
hdr=(I_IGRP_HDR_CONTROL, [
('olchk', False),
('w1p', False),
('w0p', False),
('cc', OM_EXEC_CND),
('miscctl', False),
('ctrlop', 'b')
]),
enc_ops=[('ctrl', O_BR_NEXT)]
)
group_map(O_MUTEX,
hdr=(I_IGRP_HDR_CONTROL, [
('olchk', False),

View file

@ -435,6 +435,8 @@ O_CNDEND = hw_op('cndend', [OM_EXEC_CND], 2, 2)
O_BR = hw_op('br', [OM_EXEC_CND, OM_BRANCH_CND, OM_LINK], has_target_cf_node=True)
O_BR_NEXT = hw_op('br.next', [OM_EXEC_CND])
O_MUTEX = hw_op('mutex', [OM_MTX_OP], 0, 1)
# Combination (> 1 instructions per group).
@ -458,6 +460,8 @@ O_ST32 = hw_op('st32', OM_ALU_RPT1 + [OM_MCU_CACHE_MODE_ST], 0, 5)
O_IADD32_ATOMIC = hw_op('iadd32.atomic', OM_ALU_ATOMEXT + [OM_S], 2, 3, [], [[RM_ABS, RM_NEG], [RM_ABS, RM_NEG]])
O_XCHG_ATOMIC = hw_op('xchg.atomic', OM_ALU_ATOMEXT, 2, 2, [], [[RM_ABS, RM_NEG], [RM_ABS, RM_NEG]])
O_FLUSH_P0 = hw_op('flush.p0', [OM_EXEC_CND, OM_END])
# Pseudo-ops (unmapped).
O_FNEG = pseudo_op('fneg', OM_ALU, 1, 1)
O_FABS = pseudo_op('fabs', OM_ALU, 1, 1)

View file

@ -42,6 +42,13 @@ static pco_block *trans_cf_nodes(trans_ctx *tctx,
struct exec_list *cf_node_list,
struct exec_list *nir_cf_node_list);
static inline void pco_fence(pco_builder *b)
{
pco_flush_p0(b);
pco_br_next(b, .exec_cnd = PCO_EXEC_CND_E1_Z1);
pco_br_next(b, .exec_cnd = PCO_EXEC_CND_E1_Z0);
}
/**
* \brief Splits a vector destination into scalar components.
*
@ -341,6 +348,9 @@ trans_load_input_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
PCO_REG_CLASS_COEFF,
ROGUE_USC_COEFFICIENT_SET_SIZE);
if (usc_itrsmp_enhanced)
pco_fence(&tctx->b);
return usc_itrsmp_enhanced ? pco_ditrp(&tctx->b,
dest,
pco_ref_drc(PCO_DRC_0),
@ -367,6 +377,9 @@ trans_load_input_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
}
case INTERP_MODE_NOPERSPECTIVE:
if (usc_itrsmp_enhanced)
pco_fence(&tctx->b);
return usc_itrsmp_enhanced ? pco_ditr(&tctx->b,
dest,
pco_ref_drc(PCO_DRC_0),