diff --git a/src/imagination/pco/pco_internal.h b/src/imagination/pco/pco_internal.h index e06368a0809..f808928c725 100644 --- a/src/imagination/pco/pco_internal.h +++ b/src/imagination/pco/pco_internal.h @@ -2545,6 +2545,24 @@ static inline enum pco_srcsel pco_ref_srcsel(pco_ref ref) UNREACHABLE(""); } +static inline enum pco_count_src pco_ref_count_src(pco_ref ref) +{ + enum pco_io io = pco_ref_get_io(ref); + + switch (io) { + case PCO_IO_S2: + return PCO_COUNT_SRC_S2; + + case PCO_IO_FT2: + return PCO_COUNT_SRC_FT2; + + default: + break; + } + + UNREACHABLE(""); +} + /** * \brief Returns whether none of the lower/upper sources in an instruction * group are set. diff --git a/src/imagination/pco/pco_map.py b/src/imagination/pco/pco_map.py index fbeeda8f541..93e07b5ff48 100644 --- a/src/imagination/pco/pco_map.py +++ b/src/imagination/pco/pco_map.py @@ -254,7 +254,10 @@ enum_map(OM_SHIFTOP.t, F_SHIFT2_OP, [ ('shr', 'shr'), ('rol', 'rol'), ('cps', 'cps'), - ('asr', 'asr_twb'), + ('asr_twb', 'asr_twb'), + ('asr_pwb', 'asr_pwb'), + ('asr_mtb', 'asr_mtb'), + ('asr_ftb', 'asr_ftb'), ]) enum_map(RM_ELEM.t, F_UPCK_ELEM, [ @@ -1365,6 +1368,54 @@ encode_map(O_MSK_BBYP0S1, op_ref_maps=[('0', ['ft0', 'ft1', 'ft2'], ['s0', 's1', 's2'])] ) +encode_map(O_MSK_LSL, + encodings=[ + (I_PHASE0_SRC, [ + ('count_src', 's2'), + ('count_op', 'byp'), + ('bitmask_src_op', 'msk'), + ('shift1_op', 'lsl') + ]) + ], + op_ref_maps=[('0', ['ft0', 'ft1', 'ft2'], ['s0', 's1', 's2'])] +) + +encode_map(O_CBS, + encodings=[ + (I_PHASE0_SRC, [ + ('count_src', ('pco_ref_count_src', SRC(0))), + ('count_op', 'cbs'), + ('bitmask_src_op', 'byp'), + ('shift1_op', 'byp') + ]) + ], + op_ref_maps=[('0', ['ft3'], [['s2', 'ft2']])] +) + +encode_map(O_FTB, + encodings=[ + (I_PHASE0_SRC, [ + ('count_src', ('pco_ref_count_src', SRC(0))), + ('count_op', 'ftb'), + ('bitmask_src_op', 'byp'), + ('shift1_op', 'byp') + ]) + ], + op_ref_maps=[('0', ['ft3'], [['s2', 'ft2']])] +) + +encode_map(O_REV, + encodings=[ + (I_PHASE0_SRC, [ + ('count_src', 'ft2'), + ('count_op', 'byp'), + ('bitmask_src_op', 'byp'), + ('shift1_op', 'rev') + ]) + ], + op_ref_maps=[('0', ['ft2'], ['s2'])] +) + encode_map(O_LOGICAL, encodings=[ (I_PHASE1, [ @@ -1385,7 +1436,7 @@ encode_map(O_SHIFT, ('shift2_op', OM_SHIFTOP), ]) ], - op_ref_maps=[('2', ['ft5'], ['ft4', 's4', ['_', 'p0']])] + op_ref_maps=[('2', ['ft5'], ['ft4', 's4', ['_', 'ft0', 'ft3']])] ) encode_map(O_WOP, encodings=[(I_WOP, [])], op_ref_maps=[('ctrl', [], [])]) @@ -2499,6 +2550,54 @@ group_map(O_MOVI32, dests=[('w[0]', ('0', DEST(1)), 'ft1')] ) +group_map(O_CBS, + hdr=(I_IGRP_HDR_BITWISE, [ + ('opcnt', 'p0'), + ('olchk', OM_OLCHK), + ('w1p', True), + ('w0p', False), + ('cc', OM_EXEC_CND), + ('end', OM_END), + ('atom', OM_ATOM), + ('rpt', OM_RPT) + ]), + enc_ops=[('0', O_CBS)], + srcs=[('s[2]', ('0', SRC(0)), 's2')], + dests=[('w[1]', ('0', DEST(0)), 'ft3')] +) + +group_map(O_FTB, + hdr=(I_IGRP_HDR_BITWISE, [ + ('opcnt', 'p0'), + ('olchk', OM_OLCHK), + ('w1p', True), + ('w0p', False), + ('cc', OM_EXEC_CND), + ('end', OM_END), + ('atom', OM_ATOM), + ('rpt', OM_RPT) + ]), + enc_ops=[('0', O_FTB)], + srcs=[('s[2]', ('0', SRC(0)), 's2')], + dests=[('w[1]', ('0', DEST(0)), 'ft3')] +) + +group_map(O_REV, + hdr=(I_IGRP_HDR_BITWISE, [ + ('opcnt', 'p0'), + ('olchk', OM_OLCHK), + ('w1p', True), + ('w0p', False), + ('cc', OM_EXEC_CND), + ('end', OM_END), + ('atom', OM_ATOM), + ('rpt', OM_RPT) + ]), + enc_ops=[('0', O_REV)], + srcs=[('s[2]', ('0', SRC(0)), 's2')], + dests=[('w[1]', ('0', DEST(0)), 'ft2')] +) + group_map(O_LOGICAL, hdr=(I_IGRP_HDR_BITWISE, [ ('opcnt', ['p0', 'p1']), @@ -2567,6 +2666,82 @@ group_map(O_COPYSIGN, dests=[('w[0]', ('1', DEST(0)), 'ft4')] ) +group_map(O_IBFE, + hdr=(I_IGRP_HDR_BITWISE, [ + ('opcnt', ['p0', 'p1', 'p2']), + ('olchk', OM_OLCHK), + ('w1p', False), + ('w0p', True), + ('cc', OM_EXEC_CND), + ('end', OM_END), + ('atom', OM_ATOM), + ('rpt', OM_RPT) + ]), + enc_ops=[ + ('0', O_MSK_BBYP0S1, ['ft0', 'ft1', 'ft2'], [SRC(2), SRC(1), SRC(0)]), + ('1', O_LOGICAL, ['ft4'], ['ft1', 'ft2', 'ft1_invert', 'pco_zero'], [(OM_LOGIOP, 'or')]), + ('2', O_SHIFT, [DEST(0)], ['ft4', SRC(1), 'ft0'], [(OM_SHIFTOP, 'asr_mtb')]) + ], + srcs=[ + ('s[0]', ('0', SRC(0)), 's0'), + ('s[1]', ('0', SRC(1)), 's1'), + ('s[2]', ('0', SRC(2)), 's2'), + ('s[3]', ('1', SRC(3)), 's3'), + ('s[4]', ('2', SRC(1)), 's4') + ], + dests=[('w[0]', ('2', DEST(0)), 'ft5')] +) + +group_map(O_UBFE, + hdr=(I_IGRP_HDR_BITWISE, [ + ('opcnt', ['p0', 'p1', 'p2']), + ('olchk', OM_OLCHK), + ('w1p', False), + ('w0p', True), + ('cc', OM_EXEC_CND), + ('end', OM_END), + ('atom', OM_ATOM), + ('rpt', OM_RPT) + ]), + enc_ops=[ + ('0', O_MSK_BBYP0S1, ['ft0', 'ft1', 'ft2'], [SRC(2), SRC(1), SRC(0)]), + ('1', O_LOGICAL, ['ft4'], ['ft1', 'ft2', 'ft1_invert', 'pco_zero'], [(OM_LOGIOP, 'or')]), + ('2', O_SHIFT, [DEST(0)], ['ft4', SRC(1), '_'], [(OM_SHIFTOP, 'shr')]) + ], + srcs=[ + ('s[0]', ('0', SRC(0)), 's0'), + ('s[1]', ('0', SRC(1)), 's1'), + ('s[2]', ('0', SRC(2)), 's2'), + ('s[3]', ('1', SRC(3)), 's3'), + ('s[4]', ('2', SRC(1)), 's4') + ], + dests=[('w[0]', ('2', DEST(0)), 'ft5')] +) + +group_map(O_BFI, + hdr=(I_IGRP_HDR_BITWISE, [ + ('opcnt', ['p0', 'p1']), + ('olchk', OM_OLCHK), + ('w1p', False), + ('w0p', True), + ('cc', OM_EXEC_CND), + ('end', OM_END), + ('atom', OM_ATOM), + ('rpt', OM_RPT) + ]), + enc_ops=[ + ('0', O_MSK_LSL, ['ft0', 'ft1', 'ft2'], [SRC(3), SRC(2), SRC(1)]), + ('1', O_LOGICAL, [DEST(0)], ['ft1', 'ft2', 'ft1_invert', SRC(0)], [(OM_LOGIOP, 'or')]), + ], + srcs=[ + ('s[0]', ('0', SRC(0)), 's0'), + ('s[1]', ('0', SRC(1)), 's1'), + ('s[2]', ('0', SRC(2)), 's2'), + ('s[3]', ('1', SRC(3)), 's3'), + ], + dests=[('w[0]', ('1', DEST(0)), 'ft4')] +) + group_map(O_WOP, hdr=(I_IGRP_HDR_CONTROL, [ ('olchk', False), diff --git a/src/imagination/pco/pco_nir.c b/src/imagination/pco/pco_nir.c index 71d7654d89c..cfee1cfee8e 100644 --- a/src/imagination/pco/pco_nir.c +++ b/src/imagination/pco/pco_nir.c @@ -38,12 +38,14 @@ static const nir_shader_compiler_options nir_options = { .lower_fdiv = true, .lower_ffract = true, + .lower_find_lsb = true, .lower_fquantize2f16 = true, .lower_flrp32 = true, .lower_fmod = true, .lower_fpow = true, .lower_fsqrt = true, .lower_ftrunc = true, + .lower_ifind_msb = true, .lower_ldexp = true, .lower_layer_fs_input_to_sysval = true, .compact_arrays = true, diff --git a/src/imagination/pco/pco_ops.py b/src/imagination/pco/pco_ops.py index c6318d9f39a..74d760a1baa 100644 --- a/src/imagination/pco/pco_ops.py +++ b/src/imagination/pco/pco_ops.py @@ -290,7 +290,10 @@ OM_SHIFTOP = op_mod_enum('shiftop', [ 'shr', 'rol', 'cps', - 'asr', + 'asr_twb', + 'asr_pwb', + 'asr_mtb', + 'asr_ftb', ]) OM_CND = op_mod_enum('cnd', [ @@ -363,15 +366,23 @@ O_ATOMIC = hw_op('atomic', [OM_OLCHK, OM_EXEC_CND, OM_END, OM_ATOM_OP], 1, 2) ## Bitwise. O_MOVI32 = hw_op('movi32', OM_ALU, 1, 1) +O_CBS = hw_op('cbs', OM_ALU, 1, 1) +O_FTB = hw_op('ftb', OM_ALU, 1, 1) +O_REV = hw_op('rev', OM_ALU, 1, 1) O_LOGICAL = hw_op('logical', OM_ALU + [OM_LOGIOP], 1, 4) O_SHIFT = hw_op('shift', OM_ALU + [OM_SHIFTOP], 1, 3) O_COPYSIGN = hw_op('copysign', OM_ALU, 1, 2) +O_IBFE = hw_op('ibfe', OM_ALU, 1, 3) +O_UBFE = hw_op('ubfe', OM_ALU, 1, 3) +O_BFI = hw_op('bfi', OM_ALU, 1, 4) + O_BBYP0BM = hw_direct_op('bbyp0bm', [], 2, 2) O_BBYP0BM_IMM32 = hw_direct_op('bbyp0bm_imm32', [], 2, 2) O_BBYP0S1 = hw_direct_op('bbyp0s1', [], 1, 1) O_MSK_BBYP0S1 = hw_direct_op('msk_bbyp0s1', [], 3, 3) +O_MSK_LSL = hw_direct_op('msk_lsl', [], 3, 3) ## Control. O_WOP = hw_op('wop') diff --git a/src/imagination/pco/pco_trans_nir.c b/src/imagination/pco/pco_trans_nir.c index 292192fa772..f70d0a15404 100644 --- a/src/imagination/pco/pco_trans_nir.c +++ b/src/imagination/pco/pco_trans_nir.c @@ -1127,7 +1127,7 @@ static pco_instr *trans_shift(trans_ctx *tctx, break; case nir_op_ishr: - shiftop = PCO_SHIFTOP_ASR; + shiftop = PCO_SHIFTOP_ASR_TWB; break; case nir_op_ushr: @@ -1454,6 +1454,39 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu) instr = trans_shift(tctx, alu->op, dest, src[0], src[1]); break; + case nir_op_bit_count: + instr = pco_cbs(&tctx->b, dest, src[0]); + break; + + case nir_op_ufind_msb: + instr = pco_ftb(&tctx->b, dest, src[0]); + break; + + case nir_op_ibitfield_extract: { + pco_ref bfe = pco_ref_new_ssa32(tctx->func); + pco_ibfe(&tctx->b, bfe, src[0], src[1], src[2]); + instr = pco_bcsel(&tctx->b, dest, src[2], bfe, pco_zero); + break; + } + + case nir_op_ubitfield_extract: { + pco_ref bfe = pco_ref_new_ssa32(tctx->func); + pco_ubfe(&tctx->b, bfe, src[0], src[1], src[2]); + instr = pco_bcsel(&tctx->b, dest, src[2], bfe, pco_zero); + break; + } + + case nir_op_bitfield_insert: { + pco_ref bfi = pco_ref_new_ssa32(tctx->func); + pco_bfi(&tctx->b, bfi, src[0], src[1], src[2], src[3]); + instr = pco_bcsel(&tctx->b, dest, src[3], bfi, src[0]); + break; + } + + case nir_op_bitfield_reverse: + instr = pco_rev(&tctx->b, dest, src[0]); + break; + case nir_op_f2i32: instr = pco_pck(&tctx->b, dest,