mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-03 09:20:13 +01:00
nir: add nir_op_fmulz and nir_op_ffmaz
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13436>
This commit is contained in:
parent
945fb51fb5
commit
7f05ea3793
3 changed files with 33 additions and 0 deletions
|
|
@ -3446,6 +3446,9 @@ typedef struct nir_shader_compiler_options {
|
|||
*/
|
||||
bool use_scoped_barrier;
|
||||
|
||||
/** Backend supports fmulz (and ffmaz if lower_ffma32=false) */
|
||||
bool has_fmulz;
|
||||
|
||||
/**
|
||||
* Is this the Intel vec4 backend?
|
||||
*
|
||||
|
|
|
|||
|
|
@ -669,6 +669,20 @@ if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) {
|
|||
dst = src0 * src1;
|
||||
}
|
||||
""")
|
||||
|
||||
# Unlike fmul, anything (even infinity or NaN) multiplied by zero is always zero.
|
||||
# fmulz(0.0, inf) and fmulz(0.0, nan) must be +/-0.0, even if
|
||||
# SIGNED_ZERO_INF_NAN_PRESERVE is not used. If SIGNED_ZERO_INF_NAN_PRESERVE is used, then
|
||||
# the result must be a positive zero if either operand is zero.
|
||||
binop("fmulz", tfloat32, _2src_commutative + associative, """
|
||||
if (src0 == 0.0 || src1 == 0.0)
|
||||
dst = 0.0;
|
||||
else if (nir_is_rounding_mode_rtz(execution_mode, 32))
|
||||
dst = _mesa_double_to_float_rtz((double)src0 * (double)src1);
|
||||
else
|
||||
dst = src0 * src1;
|
||||
""")
|
||||
|
||||
# low 32-bits of signed/unsigned integer multiply
|
||||
binop("imul", tint, _2src_commutative + associative, """
|
||||
/* Use 64-bit multiplies to prevent overflow of signed arithmetic */
|
||||
|
|
@ -960,6 +974,19 @@ if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) {
|
|||
}
|
||||
""")
|
||||
|
||||
# Unlike ffma, anything (even infinity or NaN) multiplied by zero is always zero.
|
||||
# ffmaz(0.0, inf, src2) and ffmaz(0.0, nan, src2) must be +/-0.0 + src2, even if
|
||||
# SIGNED_ZERO_INF_NAN_PRESERVE is not used. If SIGNED_ZERO_INF_NAN_PRESERVE is used, then
|
||||
# the result must be a positive zero plus src2 if either src0 or src1 is zero.
|
||||
triop("ffmaz", tfloat32, _2src_commutative, """
|
||||
if (src0 == 0.0 || src1 == 0.0)
|
||||
dst = 0.0 + src2;
|
||||
else if (nir_is_rounding_mode_rtz(execution_mode, 32))
|
||||
dst = _mesa_float_fma_rtz(src0, src1, src2);
|
||||
else
|
||||
dst = fmaf(src0, src1, src2);
|
||||
""")
|
||||
|
||||
triop("flrp", tfloat, "", "src0 * (1 - src2) + src1 * src2")
|
||||
|
||||
# Ternary addition
|
||||
|
|
|
|||
|
|
@ -330,10 +330,12 @@ optimizations.extend([
|
|||
(('ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma16'),
|
||||
(('ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma32'),
|
||||
(('ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma64'),
|
||||
(('ffmaz', a, b, c), ('fadd', ('fmulz', a, b), c), 'options->lower_ffma32'),
|
||||
# Always lower inexact ffma, because it will be fused back by late optimizations (nir_opt_algebraic_late).
|
||||
(('~ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma16'),
|
||||
(('~ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma32'),
|
||||
(('~ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma64'),
|
||||
(('~ffmaz', a, b, c), ('fadd', ('fmulz', a, b), c), 'options->fuse_ffma32'),
|
||||
|
||||
(('~fmul', ('fadd', ('iand', ('ineg', ('b2i', 'a@bool')), ('fmul', b, c)), '#d'), '#e'),
|
||||
('bcsel', a, ('fmul', ('fadd', ('fmul', b, c), d), e), ('fmul', d, e))),
|
||||
|
|
@ -2483,6 +2485,7 @@ late_optimizations = [
|
|||
(('~fadd@16', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma16'),
|
||||
(('~fadd@32', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma32'),
|
||||
(('~fadd@64', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma64'),
|
||||
(('~fadd@32', ('fmulz', a, b), c), ('ffmaz', a, b, c), 'options->fuse_ffma32'),
|
||||
|
||||
# Subtractions get lowered during optimization, so we need to recombine them
|
||||
(('fadd', a, ('fneg', 'b')), ('fsub', 'a', 'b'), 'options->has_fsub'),
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue