pco: add native u{add,sub}{carry,borrow,sat} ops

Implements ops without needing the NIR lowering.
The sum and carry parts can later be combined into single instruction.

Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Tested-by: Icenowy Zheng <zhengxingda@iscas.ac.cn>
Acked-by: Frank Binns <frank.binns@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40607>
This commit is contained in:
Simon Perretta 2026-03-24 11:53:45 +00:00 committed by Marge Bot
parent 181611786c
commit 49b1500bac
5 changed files with 97 additions and 5 deletions

View file

@ -3164,8 +3164,9 @@ static inline bool pco_should_skip_pass(const char *pass)
/** Integer 31. */
#define pco_31 pco_ref_hwreg(31, PCO_REG_CLASS_CONST)
/** Integer -1/true/0xffffffff. */
/** Integer -1/true/0xffffffff/u32max. */
#define pco_true pco_ref_hwreg(143, PCO_REG_CLASS_CONST)
#define pco_u32max pco_true
/** Float 1. */
#define pco_fone pco_ref_hwreg(64, PCO_REG_CLASS_CONST)

View file

@ -2689,6 +2689,75 @@ group_map(O_IMUL32,
]
)
group_map(O_UADD_CARRY,
hdr=(I_IGRP_HDR_MAIN, [
('oporg', 'p0_p1_p2'),
('olchk', OM_OLCHK),
('w1p', ('!pco_ref_is_null', DEST(1))),
('w0p', ('!pco_ref_is_null', DEST(0))),
('cc', OM_EXEC_CND),
('end', OM_END),
('atom', OM_ATOM),
('rpt', OM_RPT)
]),
enc_ops=[
('0', O_ADD64_32, ['ft0', 'fte'], [SRC(0), 'pco_zero', SRC(1), '_'], [(OM_S, False)]),
('1', O_MBYP, ['ft1'], ['pco_one']),
('2_tst', O_TST, ['ftt', '_'], ['is1', '_'], [(OM_TST_OP_MAIN, 'gzero'), (OM_TST_TYPE_MAIN, 'u32')]),
('2_pck', O_PCK, ['ft2'], ['_'], [(OM_PCK_FMT, 'zero')]),
('2_mov', O_MOVC, [DEST(0), DEST(1)], ['ftt', 'ft0', 'is4', 'ft1', 'is5'])
],
srcs=[
('s[0]', ('0', SRC(0)), 's0'),
('s[1]', ('0', SRC(1)), 's1'),
('s[2]', ('0', SRC(2)), 's2'),
('s[3]', ('1', SRC(0)), 's3'),
],
iss=[
('is[0]', 's3'),
('is[1]', 'fte'),
('is[4]', 'ft0'),
('is[5]', 'ft2'),
],
dests=[
('w[0]', ('2_mov', DEST(0)), 'w0'),
('w[1]', ('2_mov', DEST(1)), 'w1'),
]
)
group_map(O_UADD_SAT,
hdr=(I_IGRP_HDR_MAIN, [
('oporg', 'p0_p1_p2'),
('olchk', OM_OLCHK),
('w1p', False),
('w0p', True),
('cc', OM_EXEC_CND),
('end', OM_END),
('atom', OM_ATOM),
('rpt', OM_RPT)
]),
enc_ops=[
('0', O_ADD64_32, ['ft0', 'fte'], [SRC(0), 'pco_zero', SRC(1), '_']),
('1', O_MBYP, ['ft1'], [SRC(2)]),
('2_tst', O_TST, ['ftt', '_'], ['is1', '_'], [(OM_TST_OP_MAIN, 'gzero'), (OM_TST_TYPE_MAIN, 'u32'), (OM_PHASE2END, True)]),
('2_mov', O_MOVC, [DEST(0), '_'], ['ftt', 'ft1', 'is4', '_', '_'])
],
srcs=[
('s[0]', ('0', SRC(0)), 's0'),
('s[1]', ('0', SRC(1)), 's1'),
('s[2]', ('0', SRC(2)), 's2'),
('s[3]', ('1', SRC(0)), 's3'),
],
iss=[
('is[0]', 's3'),
('is[1]', 'fte'),
('is[4]', 'ft0'),
],
dests=[
('w[0]', ('2_mov', DEST(0)), 'w0'),
]
)
group_map(O_TSTZ,
hdr=(I_IGRP_HDR_MAIN, [
('oporg', 'p0_p2'),

View file

@ -53,12 +53,9 @@ static const nir_shader_compiler_options nir_options = {
.lower_fpow = true,
.lower_fsqrt = true,
.lower_ftrunc = true,
.lower_iadd_sat = true,
.lower_ifind_msb = true,
.lower_layer_fs_input_to_sysval = true,
.lower_uadd_carry = true,
.lower_uadd_sat = true,
.lower_usub_borrow = true,
.lower_usub_sat = true,
.lower_mul_2x32_64 = true,
.compact_arrays = true,
.scalarize_ddx = true,

View file

@ -489,6 +489,9 @@ O_MAX = hw_op('max', OM_ALU + [OM_TST_TYPE_MAIN], 1, 2, [], [[RM_ABS, RM_NEG], [
O_IADD32 = hw_op('iadd32', OM_ALU + [OM_S], 1, 3, [], [[RM_ABS, RM_NEG], [RM_ABS, RM_NEG]])
O_IMUL32 = hw_op('imul32', OM_ALU + [OM_S], 1, 3, [], [[RM_ABS, RM_NEG], [RM_ABS, RM_NEG]])
O_UADD_CARRY = hw_op('uadd_carry', OM_ALU, 2, 2)
O_UADD_SAT = hw_op('uadd_sat', OM_ALU, 1, 3)
O_TSTZ = hw_op('tstz', OM_ALU + [OM_TST_TYPE_MAIN], 2, 1, [], [[RM_ELEM]])
O_ST32 = hw_op('st32', OM_ALU_RPT1 + [OM_MCU_CACHE_MODE_ST, OM_IDF], 0, 5)
O_ST32_REGBL = hw_op('st32.regbl', OM_ALU_RPT1 + [OM_MCU_CACHE_MODE_ST, OM_IDF], 0, 5)

View file

@ -3201,6 +3201,28 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
instr = pco_iadd32(&tctx->b, dest, src[0], src[1], pco_ref_null());
break;
/* TODO: PCO pass to combine u{add,sub}{carry,borrow}s with the same srcs. */
case nir_op_uadd_carry:
instr = pco_uadd_carry(&tctx->b, pco_ref_null(), dest, src[0], src[1]);
break;
case nir_op_usub_borrow:
instr = pco_uadd_carry(&tctx->b,
pco_ref_null(),
dest,
pco_ref_neg(src[1]),
src[0]);
break;
case nir_op_uadd_sat:
instr = pco_uadd_sat(&tctx->b, dest, src[0], src[1], pco_u32max);
break;
case nir_op_usub_sat:
instr =
pco_uadd_sat(&tctx->b, dest, pco_ref_neg(src[1]), src[0], pco_zero);
break;
case nir_op_uadd64_32: {
pco_ref dest_comps[2] = {
[0] = pco_ref_new_ssa32(tctx->func),