diff --git a/src/amd/common/nir/ac_nir.c b/src/amd/common/nir/ac_nir.c index d4640969617..610fa31aaff 100644 --- a/src/amd/common/nir/ac_nir.c +++ b/src/amd/common/nir/ac_nir.c @@ -99,6 +99,7 @@ void ac_nir_set_options(const struct ac_compiler_info *info, bool use_llvm, options->has_pack_half_2x16_rtz = true; options->has_bit_test = !use_llvm; options->has_fmulz = true; + options->has_ffmaz_no_denorms = info->gfx_level >= GFX10_3; options->has_msad = true; options->has_shfr32 = true; options->has_mul24_relaxed = true; diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index cd7cf7b4164..1d4abd0fe42 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -43,6 +43,9 @@ has_fmulz = '(options->has_fmulz || \ (options->has_fmulz_no_denorms && \ !nir_is_denorm_preserve(info->float_controls_execution_mode, 32)))' +has_ffmaz = '(options->has_ffmaz_no_denorms && \ + !nir_is_denorm_preserve(info->float_controls_execution_mode, 32))' + denorm_ftz_16 = 'nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 16)' denorm_ftz_32 = 'nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 32)' denorm_ftz_64 = 'nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 64)' @@ -388,13 +391,13 @@ optimizations += [ # ffma(b==0.0 ? 0.0 : a, a==0.0 ? 0.0 : b, c) -> ffmaz(a, b, c) *add_fabs_fneg((('ffma@32(nsz)', ('bcsel', ('feq', b, 0.0), 0.0, 'ma'), ('bcsel', ('feq', a, 0.0), 0.0, 'mb'), c), - ('ffmaz', 'ma', 'mb', c), has_fmulz), {'ma' : a, 'mb' : b}), + ('ffmaz', 'ma', 'mb', c), has_ffmaz), {'ma' : a, 'mb' : b}), *add_fabs_fneg((('ffma@32(nsz)', 'ma', ('bcsel', ('feq', a, 0.0), 0.0, '#b(is_not_const_zero)'), c), - ('ffmaz', 'ma', b, c), has_fmulz), {'ma' : a}), + ('ffmaz', 'ma', b, c), has_ffmaz), {'ma' : a}), *add_fabs_fneg((('ffma@32(nsz)', ('b2f', ('iand', ('fneu', a, 0.0), b)), ('bcsel', b, 'ma', 0.0), c), - ('ffmaz', 'ma', ('b2f', b), c), has_fmulz), {'ma' : a}), + ('ffmaz', 'ma', ('b2f', b), c), has_ffmaz), {'ma' : a}), *add_fabs_fneg((('ffma@32(nsz)', ('b2f', ('inot', ('ior', ('feq', a, 0.0), b))), ('bcsel', b, 0.0, 'ma'), c), - ('ffmaz', 'ma', ('b2f', ('inot', b)), c), has_fmulz), {'ma' : a}), + ('ffmaz', 'ma', ('b2f', ('inot', b)), c), has_ffmaz), {'ma' : a}), # b == 0.0 ? 1.0 : fexp2(fmul(a, b)) -> fexp2(fmulz(a, b)) *add_fabs_fneg((('bcsel(nsz,nnan,ninf)', ('feq', b, 0.0), 1.0, ('fexp2', ('fmul@32', a, 'mb'))), @@ -3725,6 +3728,9 @@ for sz, mulz in itertools.product([16, 32, 64], [False, True]): option_fmad = f'{option_fuse} && (!{option_has_ffma} || {option_prefer_split}) && {option_has_fmad}' option_ffma = f'{option_fuse} && (!{option_has_fmad} || !{option_prefer_split}) && {option_has_ffma}' + if mulz: + option_ffma += f' && {has_ffmaz}' + for fmad in ['ffma', 'fmad']: option = option_fmad if fmad == 'fmad' else option_ffma # contract is only needed for ffma diff --git a/src/compiler/nir/nir_shader_compiler_options.h b/src/compiler/nir/nir_shader_compiler_options.h index fb4131a1bf4..46f30536cd6 100644 --- a/src/compiler/nir/nir_shader_compiler_options.h +++ b/src/compiler/nir/nir_shader_compiler_options.h @@ -673,15 +673,21 @@ typedef struct nir_shader_compiler_options { /** Backend supports bfdot2_bfadd opcode. */ bool has_bfdot2_bfadd; - /** Backend supports fmulz (and ffmaz if lower_ffma32=false) */ + /** Backend supports fmulz (and fmadz if has_fmad) */ bool has_fmulz; /** - * Backend supports fmulz (and ffmaz if lower_ffma32=false) but only if + * Backend supports fmulz (and fmadz if has_fmad) but only if * FLOAT_CONTROLS_DENORM_PRESERVE_FP32 is not set */ bool has_fmulz_no_denorms; + /** + * Backend supports ffmaz but only if + * FLOAT_CONTROLS_DENORM_PRESERVE_FP32 is not set + */ + bool has_ffmaz_no_denorms; + /** Backend supports fcanonicalize, if not set fcanonicalize will be lowered * to fmul(a, 1.0) */ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 4889eeca771..d987d134925 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -3628,6 +3628,7 @@ nvir_nir_shader_compiler_options(int chipset, uint8_t shader_type) op.has_rotate32 = (chipset >= NVISA_GV100_CHIPSET); op.has_imul24 = false; op.has_fmulz = (chipset > NVISA_G80_CHIPSET); + op.has_ffmaz_no_denorms = (chipset >= NVISA_GF100_CHIPSET); op.intel_vec4 = false; op.lower_uniforms_to_ubo = true; op.force_indirect_unrolling = (nir_variable_mode) ( diff --git a/src/nouveau/compiler/nak/api.rs b/src/nouveau/compiler/nak/api.rs index 0bee0aeacfd..3866a22084b 100644 --- a/src/nouveau/compiler/nak/api.rs +++ b/src/nouveau/compiler/nak/api.rs @@ -179,6 +179,7 @@ fn nir_options(dev: &nv_device_info) -> nir_shader_compiler_options { // We set .ftz on f32 by default so we can support fmulz whenever the client // doesn't explicitly request denorms. has_fmulz_no_denorms: true, + has_ffmaz_no_denorms: true, has_find_msb_rev: true, has_pack_half_2x16_rtz: true, has_bfm: dev.sm >= 70,