mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 11:28:05 +02:00
pco: add native u{add,sub}{carry,borrow,sat} ops
Implements ops without needing the NIR lowering. The sum and carry parts can later be combined into single instruction. Signed-off-by: Simon Perretta <simon.perretta@imgtec.com> Tested-by: Icenowy Zheng <zhengxingda@iscas.ac.cn> Acked-by: Frank Binns <frank.binns@imgtec.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40607>
This commit is contained in:
parent
181611786c
commit
49b1500bac
5 changed files with 97 additions and 5 deletions
|
|
@ -3164,8 +3164,9 @@ static inline bool pco_should_skip_pass(const char *pass)
|
|||
/** Integer 31. */
|
||||
#define pco_31 pco_ref_hwreg(31, PCO_REG_CLASS_CONST)
|
||||
|
||||
/** Integer -1/true/0xffffffff. */
|
||||
/** Integer -1/true/0xffffffff/u32max. */
|
||||
#define pco_true pco_ref_hwreg(143, PCO_REG_CLASS_CONST)
|
||||
#define pco_u32max pco_true
|
||||
|
||||
/** Float 1. */
|
||||
#define pco_fone pco_ref_hwreg(64, PCO_REG_CLASS_CONST)
|
||||
|
|
|
|||
|
|
@ -2689,6 +2689,75 @@ group_map(O_IMUL32,
|
|||
]
|
||||
)
|
||||
|
||||
group_map(O_UADD_CARRY,
|
||||
hdr=(I_IGRP_HDR_MAIN, [
|
||||
('oporg', 'p0_p1_p2'),
|
||||
('olchk', OM_OLCHK),
|
||||
('w1p', ('!pco_ref_is_null', DEST(1))),
|
||||
('w0p', ('!pco_ref_is_null', DEST(0))),
|
||||
('cc', OM_EXEC_CND),
|
||||
('end', OM_END),
|
||||
('atom', OM_ATOM),
|
||||
('rpt', OM_RPT)
|
||||
]),
|
||||
enc_ops=[
|
||||
('0', O_ADD64_32, ['ft0', 'fte'], [SRC(0), 'pco_zero', SRC(1), '_'], [(OM_S, False)]),
|
||||
('1', O_MBYP, ['ft1'], ['pco_one']),
|
||||
('2_tst', O_TST, ['ftt', '_'], ['is1', '_'], [(OM_TST_OP_MAIN, 'gzero'), (OM_TST_TYPE_MAIN, 'u32')]),
|
||||
('2_pck', O_PCK, ['ft2'], ['_'], [(OM_PCK_FMT, 'zero')]),
|
||||
('2_mov', O_MOVC, [DEST(0), DEST(1)], ['ftt', 'ft0', 'is4', 'ft1', 'is5'])
|
||||
],
|
||||
srcs=[
|
||||
('s[0]', ('0', SRC(0)), 's0'),
|
||||
('s[1]', ('0', SRC(1)), 's1'),
|
||||
('s[2]', ('0', SRC(2)), 's2'),
|
||||
('s[3]', ('1', SRC(0)), 's3'),
|
||||
],
|
||||
iss=[
|
||||
('is[0]', 's3'),
|
||||
('is[1]', 'fte'),
|
||||
('is[4]', 'ft0'),
|
||||
('is[5]', 'ft2'),
|
||||
],
|
||||
dests=[
|
||||
('w[0]', ('2_mov', DEST(0)), 'w0'),
|
||||
('w[1]', ('2_mov', DEST(1)), 'w1'),
|
||||
]
|
||||
)
|
||||
|
||||
group_map(O_UADD_SAT,
|
||||
hdr=(I_IGRP_HDR_MAIN, [
|
||||
('oporg', 'p0_p1_p2'),
|
||||
('olchk', OM_OLCHK),
|
||||
('w1p', False),
|
||||
('w0p', True),
|
||||
('cc', OM_EXEC_CND),
|
||||
('end', OM_END),
|
||||
('atom', OM_ATOM),
|
||||
('rpt', OM_RPT)
|
||||
]),
|
||||
enc_ops=[
|
||||
('0', O_ADD64_32, ['ft0', 'fte'], [SRC(0), 'pco_zero', SRC(1), '_']),
|
||||
('1', O_MBYP, ['ft1'], [SRC(2)]),
|
||||
('2_tst', O_TST, ['ftt', '_'], ['is1', '_'], [(OM_TST_OP_MAIN, 'gzero'), (OM_TST_TYPE_MAIN, 'u32'), (OM_PHASE2END, True)]),
|
||||
('2_mov', O_MOVC, [DEST(0), '_'], ['ftt', 'ft1', 'is4', '_', '_'])
|
||||
],
|
||||
srcs=[
|
||||
('s[0]', ('0', SRC(0)), 's0'),
|
||||
('s[1]', ('0', SRC(1)), 's1'),
|
||||
('s[2]', ('0', SRC(2)), 's2'),
|
||||
('s[3]', ('1', SRC(0)), 's3'),
|
||||
],
|
||||
iss=[
|
||||
('is[0]', 's3'),
|
||||
('is[1]', 'fte'),
|
||||
('is[4]', 'ft0'),
|
||||
],
|
||||
dests=[
|
||||
('w[0]', ('2_mov', DEST(0)), 'w0'),
|
||||
]
|
||||
)
|
||||
|
||||
group_map(O_TSTZ,
|
||||
hdr=(I_IGRP_HDR_MAIN, [
|
||||
('oporg', 'p0_p2'),
|
||||
|
|
|
|||
|
|
@ -53,12 +53,9 @@ static const nir_shader_compiler_options nir_options = {
|
|||
.lower_fpow = true,
|
||||
.lower_fsqrt = true,
|
||||
.lower_ftrunc = true,
|
||||
.lower_iadd_sat = true,
|
||||
.lower_ifind_msb = true,
|
||||
.lower_layer_fs_input_to_sysval = true,
|
||||
.lower_uadd_carry = true,
|
||||
.lower_uadd_sat = true,
|
||||
.lower_usub_borrow = true,
|
||||
.lower_usub_sat = true,
|
||||
.lower_mul_2x32_64 = true,
|
||||
.compact_arrays = true,
|
||||
.scalarize_ddx = true,
|
||||
|
|
|
|||
|
|
@ -489,6 +489,9 @@ O_MAX = hw_op('max', OM_ALU + [OM_TST_TYPE_MAIN], 1, 2, [], [[RM_ABS, RM_NEG], [
|
|||
O_IADD32 = hw_op('iadd32', OM_ALU + [OM_S], 1, 3, [], [[RM_ABS, RM_NEG], [RM_ABS, RM_NEG]])
|
||||
O_IMUL32 = hw_op('imul32', OM_ALU + [OM_S], 1, 3, [], [[RM_ABS, RM_NEG], [RM_ABS, RM_NEG]])
|
||||
|
||||
O_UADD_CARRY = hw_op('uadd_carry', OM_ALU, 2, 2)
|
||||
O_UADD_SAT = hw_op('uadd_sat', OM_ALU, 1, 3)
|
||||
|
||||
O_TSTZ = hw_op('tstz', OM_ALU + [OM_TST_TYPE_MAIN], 2, 1, [], [[RM_ELEM]])
|
||||
O_ST32 = hw_op('st32', OM_ALU_RPT1 + [OM_MCU_CACHE_MODE_ST, OM_IDF], 0, 5)
|
||||
O_ST32_REGBL = hw_op('st32.regbl', OM_ALU_RPT1 + [OM_MCU_CACHE_MODE_ST, OM_IDF], 0, 5)
|
||||
|
|
|
|||
|
|
@ -3201,6 +3201,28 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
|
|||
instr = pco_iadd32(&tctx->b, dest, src[0], src[1], pco_ref_null());
|
||||
break;
|
||||
|
||||
/* TODO: PCO pass to combine u{add,sub}{carry,borrow}s with the same srcs. */
|
||||
case nir_op_uadd_carry:
|
||||
instr = pco_uadd_carry(&tctx->b, pco_ref_null(), dest, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case nir_op_usub_borrow:
|
||||
instr = pco_uadd_carry(&tctx->b,
|
||||
pco_ref_null(),
|
||||
dest,
|
||||
pco_ref_neg(src[1]),
|
||||
src[0]);
|
||||
break;
|
||||
|
||||
case nir_op_uadd_sat:
|
||||
instr = pco_uadd_sat(&tctx->b, dest, src[0], src[1], pco_u32max);
|
||||
break;
|
||||
|
||||
case nir_op_usub_sat:
|
||||
instr =
|
||||
pco_uadd_sat(&tctx->b, dest, pco_ref_neg(src[1]), src[0], pco_zero);
|
||||
break;
|
||||
|
||||
case nir_op_uadd64_32: {
|
||||
pco_ref dest_comps[2] = {
|
||||
[0] = pco_ref_new_ssa32(tctx->func),
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue