pco, pygen: support gradient/derivative ops

Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
Simon Perretta 2024-12-29 10:05:28 +00:00 committed by Marge Bot
parent 3b20a3261f
commit 9047c30e50
5 changed files with 170 additions and 0 deletions

View file

@ -835,6 +835,78 @@ encode_map(O_MBYP,
]
)
encode_map(O_FDSX,
encodings=[
(I_SNGL_EXT, [
('sngl_op', 'dsx'),
('s0neg', (RM_NEG, SRC(0))),
('s0abs', (RM_ABS, SRC(0)))
]),
(I_SNGL, [('sngl_op', 'dsx')], [
(RM_NEG, SRC(0), '== false'),
(RM_ABS, SRC(0), '== false')
])
],
op_ref_maps=[
('0', ['ft0'], ['s0']),
('1', ['ft1'], ['s3'])
]
)
encode_map(O_FDSXF,
encodings=[
(I_SNGL_EXT, [
('sngl_op', 'dsxf'),
('s0neg', (RM_NEG, SRC(0))),
('s0abs', (RM_ABS, SRC(0)))
]),
(I_SNGL, [('sngl_op', 'dsx')], [
(RM_NEG, SRC(0), '== false'),
(RM_ABS, SRC(0), '== false')
])
],
op_ref_maps=[
('0', ['ft0'], ['s0']),
('1', ['ft1'], ['s3'])
]
)
encode_map(O_FDSY,
encodings=[
(I_SNGL_EXT, [
('sngl_op', 'dsy'),
('s0neg', (RM_NEG, SRC(0))),
('s0abs', (RM_ABS, SRC(0)))
]),
(I_SNGL, [('sngl_op', 'dsy')], [
(RM_NEG, SRC(0), '== false'),
(RM_ABS, SRC(0), '== false')
])
],
op_ref_maps=[
('0', ['ft0'], ['s0']),
('1', ['ft1'], ['s3'])
]
)
encode_map(O_FDSYF,
encodings=[
(I_SNGL_EXT, [
('sngl_op', 'dsyf'),
('s0neg', (RM_NEG, SRC(0))),
('s0abs', (RM_ABS, SRC(0)))
]),
(I_SNGL, [('sngl_op', 'dsy')], [
(RM_NEG, SRC(0), '== false'),
(RM_ABS, SRC(0), '== false')
])
],
op_ref_maps=[
('0', ['ft0'], ['s0']),
('1', ['ft1'], ['s3'])
]
)
encode_map(O_PCK,
encodings=[
(I_PCK, [
@ -1276,6 +1348,74 @@ group_map(O_MBYP,
dests=[('w[0]', ('0', DEST(0)), 'ft0')]
)
group_map(O_FDSX,
hdr=(I_IGRP_HDR_MAIN, [
('oporg', 'p0'),
('olchk', OM_OLCHK),
('w1p', False),
('w0p', True),
('cc', OM_EXEC_CND),
('end', OM_END),
('atom', OM_ATOM),
('rpt', OM_RPT)
]),
enc_ops=[('0', O_FDSX)],
srcs=[('s[0]', ('0', SRC(0)), 's0')],
iss=[('is[4]', 'ft0')],
dests=[('w[0]', ('0', DEST(0)), 'ft0')]
)
group_map(O_FDSXF,
hdr=(I_IGRP_HDR_MAIN, [
('oporg', 'p0'),
('olchk', OM_OLCHK),
('w1p', False),
('w0p', True),
('cc', OM_EXEC_CND),
('end', OM_END),
('atom', OM_ATOM),
('rpt', OM_RPT)
]),
enc_ops=[('0', O_FDSXF)],
srcs=[('s[0]', ('0', SRC(0)), 's0')],
iss=[('is[4]', 'ft0')],
dests=[('w[0]', ('0', DEST(0)), 'ft0')]
)
group_map(O_FDSY,
hdr=(I_IGRP_HDR_MAIN, [
('oporg', 'p0'),
('olchk', OM_OLCHK),
('w1p', False),
('w0p', True),
('cc', OM_EXEC_CND),
('end', OM_END),
('atom', OM_ATOM),
('rpt', OM_RPT)
]),
enc_ops=[('0', O_FDSY)],
srcs=[('s[0]', ('0', SRC(0)), 's0')],
iss=[('is[4]', 'ft0')],
dests=[('w[0]', ('0', DEST(0)), 'ft0')]
)
group_map(O_FDSYF,
hdr=(I_IGRP_HDR_MAIN, [
('oporg', 'p0'),
('olchk', OM_OLCHK),
('w1p', False),
('w0p', True),
('cc', OM_EXEC_CND),
('end', OM_END),
('atom', OM_ATOM),
('rpt', OM_RPT)
]),
enc_ops=[('0', O_FDSYF)],
srcs=[('s[0]', ('0', SRC(0)), 's0')],
iss=[('is[4]', 'ft0')],
dests=[('w[0]', ('0', DEST(0)), 'ft0')]
)
group_map(O_PCK,
hdr=(I_IGRP_HDR_MAIN, [
('oporg', 'p2'),

View file

@ -39,6 +39,7 @@ static const nir_shader_compiler_options nir_options = {
.lower_fquantize2f16 = true,
.lower_layer_fs_input_to_sysval = true,
.compact_arrays = true,
.scalarize_ddx = true,
};
/**

View file

@ -304,6 +304,10 @@ O_FMUL = hw_op('fmul', OM_ALU + [OM_SAT], 1, 2, [], [[RM_ABS, RM_NEG, RM_FLR], [
O_FMAD = hw_op('fmad', OM_ALU + [OM_SAT, OM_LP], 1, 3, [], [[RM_ABS, RM_NEG], [RM_ABS, RM_NEG], [RM_ABS, RM_NEG, RM_FLR]])
O_FRCP = hw_op('frcp', OM_ALU, 1, 1, [], [[RM_ABS, RM_NEG]])
O_MBYP = hw_op('mbyp', OM_ALU, 1, 1, [], [[RM_ABS, RM_NEG]])
O_FDSX = hw_op('fdsx', OM_ALU, 1, 1, [], [[RM_ABS, RM_NEG]])
O_FDSXF = hw_op('fdsxf', OM_ALU, 1, 1, [], [[RM_ABS, RM_NEG]])
O_FDSY = hw_op('fdsy', OM_ALU, 1, 1, [], [[RM_ABS, RM_NEG]])
O_FDSYF = hw_op('fdsyf', OM_ALU, 1, 1, [], [[RM_ABS, RM_NEG]])
O_PCK = hw_op('pck', OM_ALU + [OM_PCK_FMT, OM_ROUNDZERO, OM_SCALE], 1, 1)
O_ADD64_32 = hw_op('add64_32', OM_ALU + [OM_S], 2, 4, [], [[RM_ABS, RM_NEG], [], [RM_ABS, RM_NEG]])
O_IMADD64 = hw_op('imadd64', OM_ALU + [OM_S], 2, 5, [], [[RM_ABS, RM_NEG], [RM_ABS, RM_NEG], [RM_ABS, RM_NEG]])

View file

@ -535,6 +535,15 @@ static inline bool instr_has_side_effects(pco_instr *instr)
if (pco_instr_has_atom(instr) && pco_instr_get_atom(instr))
return true;
switch (instr->op) {
case PCO_OP_FDSX:
case PCO_OP_FDSY:
return true;
default:
break;
}
/* TODO:
* - gradient
* - conditional

View file

@ -557,6 +557,22 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
instr = trans_load_sysval_vs(tctx, intr, dest);
break;
case nir_intrinsic_ddx:
case nir_intrinsic_ddx_fine:
case nir_intrinsic_ddx_coarse:
instr = intr->intrinsic == nir_intrinsic_ddx_fine
? pco_fdsxf(&tctx->b, dest, src[0])
: pco_fdsx(&tctx->b, dest, src[0]);
break;
case nir_intrinsic_ddy:
case nir_intrinsic_ddy_fine:
case nir_intrinsic_ddy_coarse:
instr = intr->intrinsic == nir_intrinsic_ddy_fine
? pco_fdsyf(&tctx->b, dest, src[0])
: pco_fdsy(&tctx->b, dest, src[0]);
break;
default:
printf("Unsupported intrinsic: \"");
nir_print_instr(&intr->instr, stdout);