nir: seperate ffmaz from has_fmulz

There is no hardware which supports ffmaz with denorms.
We also need this to be seperate because there is AMD hardware
with ffma but not ffmaz.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41649>
This commit is contained in:
Georg Lehmann 2026-05-18 14:37:37 +02:00 committed by Marge Bot
parent 19e56904f2
commit a92d0356eb
5 changed files with 21 additions and 6 deletions

View file

@ -99,6 +99,7 @@ void ac_nir_set_options(const struct ac_compiler_info *info, bool use_llvm,
options->has_pack_half_2x16_rtz = true;
options->has_bit_test = !use_llvm;
options->has_fmulz = true;
options->has_ffmaz_no_denorms = info->gfx_level >= GFX10_3;
options->has_msad = true;
options->has_shfr32 = true;
options->has_mul24_relaxed = true;

View file

@ -43,6 +43,9 @@ has_fmulz = '(options->has_fmulz || \
(options->has_fmulz_no_denorms && \
!nir_is_denorm_preserve(info->float_controls_execution_mode, 32)))'
has_ffmaz = '(options->has_ffmaz_no_denorms && \
!nir_is_denorm_preserve(info->float_controls_execution_mode, 32))'
denorm_ftz_16 = 'nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 16)'
denorm_ftz_32 = 'nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 32)'
denorm_ftz_64 = 'nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 64)'
@ -388,13 +391,13 @@ optimizations += [
# ffma(b==0.0 ? 0.0 : a, a==0.0 ? 0.0 : b, c) -> ffmaz(a, b, c)
*add_fabs_fneg((('ffma@32(nsz)', ('bcsel', ('feq', b, 0.0), 0.0, 'ma'), ('bcsel', ('feq', a, 0.0), 0.0, 'mb'), c),
('ffmaz', 'ma', 'mb', c), has_fmulz), {'ma' : a, 'mb' : b}),
('ffmaz', 'ma', 'mb', c), has_ffmaz), {'ma' : a, 'mb' : b}),
*add_fabs_fneg((('ffma@32(nsz)', 'ma', ('bcsel', ('feq', a, 0.0), 0.0, '#b(is_not_const_zero)'), c),
('ffmaz', 'ma', b, c), has_fmulz), {'ma' : a}),
('ffmaz', 'ma', b, c), has_ffmaz), {'ma' : a}),
*add_fabs_fneg((('ffma@32(nsz)', ('b2f', ('iand', ('fneu', a, 0.0), b)), ('bcsel', b, 'ma', 0.0), c),
('ffmaz', 'ma', ('b2f', b), c), has_fmulz), {'ma' : a}),
('ffmaz', 'ma', ('b2f', b), c), has_ffmaz), {'ma' : a}),
*add_fabs_fneg((('ffma@32(nsz)', ('b2f', ('inot', ('ior', ('feq', a, 0.0), b))), ('bcsel', b, 0.0, 'ma'), c),
('ffmaz', 'ma', ('b2f', ('inot', b)), c), has_fmulz), {'ma' : a}),
('ffmaz', 'ma', ('b2f', ('inot', b)), c), has_ffmaz), {'ma' : a}),
# b == 0.0 ? 1.0 : fexp2(fmul(a, b)) -> fexp2(fmulz(a, b))
*add_fabs_fneg((('bcsel(nsz,nnan,ninf)', ('feq', b, 0.0), 1.0, ('fexp2', ('fmul@32', a, 'mb'))),
@ -3725,6 +3728,9 @@ for sz, mulz in itertools.product([16, 32, 64], [False, True]):
option_fmad = f'{option_fuse} && (!{option_has_ffma} || {option_prefer_split}) && {option_has_fmad}'
option_ffma = f'{option_fuse} && (!{option_has_fmad} || !{option_prefer_split}) && {option_has_ffma}'
if mulz:
option_ffma += f' && {has_ffmaz}'
for fmad in ['ffma', 'fmad']:
option = option_fmad if fmad == 'fmad' else option_ffma
# contract is only needed for ffma

View file

@ -673,15 +673,21 @@ typedef struct nir_shader_compiler_options {
/** Backend supports bfdot2_bfadd opcode. */
bool has_bfdot2_bfadd;
/** Backend supports fmulz (and ffmaz if lower_ffma32=false) */
/** Backend supports fmulz (and fmadz if has_fmad) */
bool has_fmulz;
/**
* Backend supports fmulz (and ffmaz if lower_ffma32=false) but only if
* Backend supports fmulz (and fmadz if has_fmad) but only if
* FLOAT_CONTROLS_DENORM_PRESERVE_FP32 is not set
*/
bool has_fmulz_no_denorms;
/**
* Backend supports ffmaz but only if
* FLOAT_CONTROLS_DENORM_PRESERVE_FP32 is not set
*/
bool has_ffmaz_no_denorms;
/** Backend supports fcanonicalize, if not set fcanonicalize will be lowered
* to fmul(a, 1.0)
*/

View file

@ -3628,6 +3628,7 @@ nvir_nir_shader_compiler_options(int chipset, uint8_t shader_type)
op.has_rotate32 = (chipset >= NVISA_GV100_CHIPSET);
op.has_imul24 = false;
op.has_fmulz = (chipset > NVISA_G80_CHIPSET);
op.has_ffmaz_no_denorms = (chipset >= NVISA_GF100_CHIPSET);
op.intel_vec4 = false;
op.lower_uniforms_to_ubo = true;
op.force_indirect_unrolling = (nir_variable_mode) (

View file

@ -179,6 +179,7 @@ fn nir_options(dev: &nv_device_info) -> nir_shader_compiler_options {
// We set .ftz on f32 by default so we can support fmulz whenever the client
// doesn't explicitly request denorms.
has_fmulz_no_denorms: true,
has_ffmaz_no_denorms: true,
has_find_msb_rev: true,
has_pack_half_2x16_rtz: true,
has_bfm: dev.sm >= 70,