pco: add support for more bitwise and bitfield ops

Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
Simon Perretta 2025-01-08 16:52:33 +00:00 committed by Marge Bot
parent 8ec174b3f9
commit ad2b623744
5 changed files with 243 additions and 4 deletions

View file

@ -2545,6 +2545,24 @@ static inline enum pco_srcsel pco_ref_srcsel(pco_ref ref)
UNREACHABLE("");
}
static inline enum pco_count_src pco_ref_count_src(pco_ref ref)
{
enum pco_io io = pco_ref_get_io(ref);
switch (io) {
case PCO_IO_S2:
return PCO_COUNT_SRC_S2;
case PCO_IO_FT2:
return PCO_COUNT_SRC_FT2;
default:
break;
}
UNREACHABLE("");
}
/**
* \brief Returns whether none of the lower/upper sources in an instruction
* group are set.

View file

@ -254,7 +254,10 @@ enum_map(OM_SHIFTOP.t, F_SHIFT2_OP, [
('shr', 'shr'),
('rol', 'rol'),
('cps', 'cps'),
('asr', 'asr_twb'),
('asr_twb', 'asr_twb'),
('asr_pwb', 'asr_pwb'),
('asr_mtb', 'asr_mtb'),
('asr_ftb', 'asr_ftb'),
])
enum_map(RM_ELEM.t, F_UPCK_ELEM, [
@ -1365,6 +1368,54 @@ encode_map(O_MSK_BBYP0S1,
op_ref_maps=[('0', ['ft0', 'ft1', 'ft2'], ['s0', 's1', 's2'])]
)
encode_map(O_MSK_LSL,
encodings=[
(I_PHASE0_SRC, [
('count_src', 's2'),
('count_op', 'byp'),
('bitmask_src_op', 'msk'),
('shift1_op', 'lsl')
])
],
op_ref_maps=[('0', ['ft0', 'ft1', 'ft2'], ['s0', 's1', 's2'])]
)
encode_map(O_CBS,
encodings=[
(I_PHASE0_SRC, [
('count_src', ('pco_ref_count_src', SRC(0))),
('count_op', 'cbs'),
('bitmask_src_op', 'byp'),
('shift1_op', 'byp')
])
],
op_ref_maps=[('0', ['ft3'], [['s2', 'ft2']])]
)
encode_map(O_FTB,
encodings=[
(I_PHASE0_SRC, [
('count_src', ('pco_ref_count_src', SRC(0))),
('count_op', 'ftb'),
('bitmask_src_op', 'byp'),
('shift1_op', 'byp')
])
],
op_ref_maps=[('0', ['ft3'], [['s2', 'ft2']])]
)
encode_map(O_REV,
encodings=[
(I_PHASE0_SRC, [
('count_src', 'ft2'),
('count_op', 'byp'),
('bitmask_src_op', 'byp'),
('shift1_op', 'rev')
])
],
op_ref_maps=[('0', ['ft2'], ['s2'])]
)
encode_map(O_LOGICAL,
encodings=[
(I_PHASE1, [
@ -1385,7 +1436,7 @@ encode_map(O_SHIFT,
('shift2_op', OM_SHIFTOP),
])
],
op_ref_maps=[('2', ['ft5'], ['ft4', 's4', ['_', 'p0']])]
op_ref_maps=[('2', ['ft5'], ['ft4', 's4', ['_', 'ft0', 'ft3']])]
)
encode_map(O_WOP, encodings=[(I_WOP, [])], op_ref_maps=[('ctrl', [], [])])
@ -2499,6 +2550,54 @@ group_map(O_MOVI32,
dests=[('w[0]', ('0', DEST(1)), 'ft1')]
)
group_map(O_CBS,
hdr=(I_IGRP_HDR_BITWISE, [
('opcnt', 'p0'),
('olchk', OM_OLCHK),
('w1p', True),
('w0p', False),
('cc', OM_EXEC_CND),
('end', OM_END),
('atom', OM_ATOM),
('rpt', OM_RPT)
]),
enc_ops=[('0', O_CBS)],
srcs=[('s[2]', ('0', SRC(0)), 's2')],
dests=[('w[1]', ('0', DEST(0)), 'ft3')]
)
group_map(O_FTB,
hdr=(I_IGRP_HDR_BITWISE, [
('opcnt', 'p0'),
('olchk', OM_OLCHK),
('w1p', True),
('w0p', False),
('cc', OM_EXEC_CND),
('end', OM_END),
('atom', OM_ATOM),
('rpt', OM_RPT)
]),
enc_ops=[('0', O_FTB)],
srcs=[('s[2]', ('0', SRC(0)), 's2')],
dests=[('w[1]', ('0', DEST(0)), 'ft3')]
)
group_map(O_REV,
hdr=(I_IGRP_HDR_BITWISE, [
('opcnt', 'p0'),
('olchk', OM_OLCHK),
('w1p', True),
('w0p', False),
('cc', OM_EXEC_CND),
('end', OM_END),
('atom', OM_ATOM),
('rpt', OM_RPT)
]),
enc_ops=[('0', O_REV)],
srcs=[('s[2]', ('0', SRC(0)), 's2')],
dests=[('w[1]', ('0', DEST(0)), 'ft2')]
)
group_map(O_LOGICAL,
hdr=(I_IGRP_HDR_BITWISE, [
('opcnt', ['p0', 'p1']),
@ -2567,6 +2666,82 @@ group_map(O_COPYSIGN,
dests=[('w[0]', ('1', DEST(0)), 'ft4')]
)
group_map(O_IBFE,
hdr=(I_IGRP_HDR_BITWISE, [
('opcnt', ['p0', 'p1', 'p2']),
('olchk', OM_OLCHK),
('w1p', False),
('w0p', True),
('cc', OM_EXEC_CND),
('end', OM_END),
('atom', OM_ATOM),
('rpt', OM_RPT)
]),
enc_ops=[
('0', O_MSK_BBYP0S1, ['ft0', 'ft1', 'ft2'], [SRC(2), SRC(1), SRC(0)]),
('1', O_LOGICAL, ['ft4'], ['ft1', 'ft2', 'ft1_invert', 'pco_zero'], [(OM_LOGIOP, 'or')]),
('2', O_SHIFT, [DEST(0)], ['ft4', SRC(1), 'ft0'], [(OM_SHIFTOP, 'asr_mtb')])
],
srcs=[
('s[0]', ('0', SRC(0)), 's0'),
('s[1]', ('0', SRC(1)), 's1'),
('s[2]', ('0', SRC(2)), 's2'),
('s[3]', ('1', SRC(3)), 's3'),
('s[4]', ('2', SRC(1)), 's4')
],
dests=[('w[0]', ('2', DEST(0)), 'ft5')]
)
group_map(O_UBFE,
hdr=(I_IGRP_HDR_BITWISE, [
('opcnt', ['p0', 'p1', 'p2']),
('olchk', OM_OLCHK),
('w1p', False),
('w0p', True),
('cc', OM_EXEC_CND),
('end', OM_END),
('atom', OM_ATOM),
('rpt', OM_RPT)
]),
enc_ops=[
('0', O_MSK_BBYP0S1, ['ft0', 'ft1', 'ft2'], [SRC(2), SRC(1), SRC(0)]),
('1', O_LOGICAL, ['ft4'], ['ft1', 'ft2', 'ft1_invert', 'pco_zero'], [(OM_LOGIOP, 'or')]),
('2', O_SHIFT, [DEST(0)], ['ft4', SRC(1), '_'], [(OM_SHIFTOP, 'shr')])
],
srcs=[
('s[0]', ('0', SRC(0)), 's0'),
('s[1]', ('0', SRC(1)), 's1'),
('s[2]', ('0', SRC(2)), 's2'),
('s[3]', ('1', SRC(3)), 's3'),
('s[4]', ('2', SRC(1)), 's4')
],
dests=[('w[0]', ('2', DEST(0)), 'ft5')]
)
group_map(O_BFI,
hdr=(I_IGRP_HDR_BITWISE, [
('opcnt', ['p0', 'p1']),
('olchk', OM_OLCHK),
('w1p', False),
('w0p', True),
('cc', OM_EXEC_CND),
('end', OM_END),
('atom', OM_ATOM),
('rpt', OM_RPT)
]),
enc_ops=[
('0', O_MSK_LSL, ['ft0', 'ft1', 'ft2'], [SRC(3), SRC(2), SRC(1)]),
('1', O_LOGICAL, [DEST(0)], ['ft1', 'ft2', 'ft1_invert', SRC(0)], [(OM_LOGIOP, 'or')]),
],
srcs=[
('s[0]', ('0', SRC(0)), 's0'),
('s[1]', ('0', SRC(1)), 's1'),
('s[2]', ('0', SRC(2)), 's2'),
('s[3]', ('1', SRC(3)), 's3'),
],
dests=[('w[0]', ('1', DEST(0)), 'ft4')]
)
group_map(O_WOP,
hdr=(I_IGRP_HDR_CONTROL, [
('olchk', False),

View file

@ -38,12 +38,14 @@ static const nir_shader_compiler_options nir_options = {
.lower_fdiv = true,
.lower_ffract = true,
.lower_find_lsb = true,
.lower_fquantize2f16 = true,
.lower_flrp32 = true,
.lower_fmod = true,
.lower_fpow = true,
.lower_fsqrt = true,
.lower_ftrunc = true,
.lower_ifind_msb = true,
.lower_ldexp = true,
.lower_layer_fs_input_to_sysval = true,
.compact_arrays = true,

View file

@ -290,7 +290,10 @@ OM_SHIFTOP = op_mod_enum('shiftop', [
'shr',
'rol',
'cps',
'asr',
'asr_twb',
'asr_pwb',
'asr_mtb',
'asr_ftb',
])
OM_CND = op_mod_enum('cnd', [
@ -363,15 +366,23 @@ O_ATOMIC = hw_op('atomic', [OM_OLCHK, OM_EXEC_CND, OM_END, OM_ATOM_OP], 1, 2)
## Bitwise.
O_MOVI32 = hw_op('movi32', OM_ALU, 1, 1)
O_CBS = hw_op('cbs', OM_ALU, 1, 1)
O_FTB = hw_op('ftb', OM_ALU, 1, 1)
O_REV = hw_op('rev', OM_ALU, 1, 1)
O_LOGICAL = hw_op('logical', OM_ALU + [OM_LOGIOP], 1, 4)
O_SHIFT = hw_op('shift', OM_ALU + [OM_SHIFTOP], 1, 3)
O_COPYSIGN = hw_op('copysign', OM_ALU, 1, 2)
O_IBFE = hw_op('ibfe', OM_ALU, 1, 3)
O_UBFE = hw_op('ubfe', OM_ALU, 1, 3)
O_BFI = hw_op('bfi', OM_ALU, 1, 4)
O_BBYP0BM = hw_direct_op('bbyp0bm', [], 2, 2)
O_BBYP0BM_IMM32 = hw_direct_op('bbyp0bm_imm32', [], 2, 2)
O_BBYP0S1 = hw_direct_op('bbyp0s1', [], 1, 1)
O_MSK_BBYP0S1 = hw_direct_op('msk_bbyp0s1', [], 3, 3)
O_MSK_LSL = hw_direct_op('msk_lsl', [], 3, 3)
## Control.
O_WOP = hw_op('wop')

View file

@ -1127,7 +1127,7 @@ static pco_instr *trans_shift(trans_ctx *tctx,
break;
case nir_op_ishr:
shiftop = PCO_SHIFTOP_ASR;
shiftop = PCO_SHIFTOP_ASR_TWB;
break;
case nir_op_ushr:
@ -1454,6 +1454,39 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
instr = trans_shift(tctx, alu->op, dest, src[0], src[1]);
break;
case nir_op_bit_count:
instr = pco_cbs(&tctx->b, dest, src[0]);
break;
case nir_op_ufind_msb:
instr = pco_ftb(&tctx->b, dest, src[0]);
break;
case nir_op_ibitfield_extract: {
pco_ref bfe = pco_ref_new_ssa32(tctx->func);
pco_ibfe(&tctx->b, bfe, src[0], src[1], src[2]);
instr = pco_bcsel(&tctx->b, dest, src[2], bfe, pco_zero);
break;
}
case nir_op_ubitfield_extract: {
pco_ref bfe = pco_ref_new_ssa32(tctx->func);
pco_ubfe(&tctx->b, bfe, src[0], src[1], src[2]);
instr = pco_bcsel(&tctx->b, dest, src[2], bfe, pco_zero);
break;
}
case nir_op_bitfield_insert: {
pco_ref bfi = pco_ref_new_ssa32(tctx->func);
pco_bfi(&tctx->b, bfi, src[0], src[1], src[2], src[3]);
instr = pco_bcsel(&tctx->b, dest, src[3], bfi, src[0]);
break;
}
case nir_op_bitfield_reverse:
instr = pco_rev(&tctx->b, dest, src[0]);
break;
case nir_op_f2i32:
instr = pco_pck(&tctx->b,
dest,