diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index 3f2e4d12c0e..caabf9efaae 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -144,6 +144,9 @@ static const nir_shader_compiler_options ir3_base_options = { .fuse_ffma16 = true, .fuse_ffma32 = true, .fuse_ffma64 = true, + .float_mul_add16 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse, + .float_mul_add32 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse, + .float_mul_add64 = nir_float_muladd_support_has_fmad | nir_float_muladd_support_fuse, .vertex_id_zero_based = false, .lower_extract_byte = true, .lower_extract_word = true, diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index aab752dd5b1..388c5a01a44 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -726,6 +726,7 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) case nir_op_fsub: dst = ir3_ADD_F_rpt(b, dst_sz, src[0], 0, src[1], IR3_REG_FNEG); break; + case nir_op_fmad: case nir_op_ffma_old: /* The scalar ALU doesn't support mad, so expand to mul+add so that we * don't unnecessarily fall back to non-earlypreamble. This is safe diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c index 08a82537940..3c05db88300 100644 --- a/src/freedreno/ir3/ir3_context.c +++ b/src/freedreno/ir3/ir3_context.c @@ -90,7 +90,6 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader, /* nir_opt_algebraic() above would have unfused our ffmas, re-fuse them. */ if (needs_late_alg) { NIR_PASS(progress, ctx->s, nir_opt_algebraic_late); - NIR_PASS(progress, ctx->s, ir3_nir_opt_algebraic_late); NIR_PASS(progress, ctx->s, nir_opt_dce); } diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 988c133c422..b20361a7f4a 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -1647,8 +1647,7 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, */ bool more_late_algebraic = true; while (more_late_algebraic) { - more_late_algebraic = OPT(s, nir_opt_algebraic_late) || - OPT(s, ir3_nir_opt_algebraic_late); + more_late_algebraic = OPT(s, nir_opt_algebraic_late); if (!more_late_algebraic && so->compiler->gen >= 5) { /* Lowers texture operations that have only f2f16 or u2u16 called on * them to have a 16-bit destination. Also, lower 16-bit texture diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h index 4797fc3b4c1..95817d5c73c 100644 --- a/src/freedreno/ir3/ir3_nir.h +++ b/src/freedreno/ir3/ir3_nir.h @@ -63,7 +63,6 @@ nir_mem_access_size_align ir3_mem_access_size_align( bool ir3_nir_opt_branch_and_or_not(nir_shader *nir); bool ir3_nir_opt_triops_bitwise(nir_shader *nir); -bool ir3_nir_opt_algebraic_late(nir_shader *nir); struct ir3_optimize_options { nir_opt_uub_options opt_uub_options; diff --git a/src/freedreno/ir3/ir3_nir_lower_load_barycentric_at_offset.c b/src/freedreno/ir3/ir3_nir_lower_load_barycentric_at_offset.c index 94a7a13ef73..375c0422830 100644 --- a/src/freedreno/ir3/ir3_nir_lower_load_barycentric_at_offset.c +++ b/src/freedreno/ir3/ir3_nir_lower_load_barycentric_at_offset.c @@ -33,8 +33,8 @@ ir3_nir_lower_load_barycentric_at_offset_instr(nir_builder *b, nir_instr *instr, * times the derivatives of ij in screen space. */ nir_def *new_ij = ij; - new_ij = nir_ffma_old(b, chan(off, 0), nir_ddx(b, ij), new_ij); - new_ij = nir_ffma_old(b, chan(off, 1), nir_ddy(b, ij), new_ij); + new_ij = nir_fmad(b, chan(off, 0), nir_ddx(b, ij), new_ij); + new_ij = nir_fmad(b, chan(off, 1), nir_ddy(b, ij), new_ij); return new_ij; } else { @@ -52,8 +52,8 @@ ir3_nir_lower_load_barycentric_at_offset_instr(nir_builder *b, nir_instr *instr, /* Get the offset value from pixel center for ij, and also for w. */ nir_def *pos = sij; - pos = nir_ffma_old(b, chan(off, 0), nir_ddx(b, sij), pos); - pos = nir_ffma_old(b, chan(off, 1), nir_ddy(b, sij), pos); + pos = nir_fmad(b, chan(off, 0), nir_ddx(b, sij), pos); + pos = nir_fmad(b, chan(off, 1), nir_ddy(b, sij), pos); /* convert back into screen space, dividing by the offset 1/w */ return nir_fmul(b, nir_trim_vector(b, pos, 2), diff --git a/src/freedreno/ir3/ir3_nir_opt_algebraic_late.py b/src/freedreno/ir3/ir3_nir_opt_algebraic_late.py deleted file mode 100644 index 09821073522..00000000000 --- a/src/freedreno/ir3/ir3_nir_opt_algebraic_late.py +++ /dev/null @@ -1,57 +0,0 @@ -# -# Copyright © 2016 Intel Corporation -# -# SPDX-License-Identifier: MIT - -import argparse -import sys - -# fuse fadd+fmul late to get something we can turn into mad.f32/f16. The -# common nir_opt_algebraic_late pass only does this for non-exact patterns. -# Since for us, mad is not fused, we don't have this restriction. -late_optimizations = [] - -a = 'a' -b = 'b' -c = 'c' - -for sz in [16, 32]: - # Fuse the correct fmul. Only consider fmuls where the only users are fadd - # (or fneg/fabs which are assumed to be propagated away), as a heuristic to - # avoid fusing in cases where it's harmful. - fmul = 'fmul(is_only_used_by_fadd)' - ffma = 'ffma_old' - - fadd = 'fadd@{}'.format(sz) - - late_optimizations.extend([ - ((fadd, (fmul, a, b), c), (ffma, a, b, c)), - - ((fadd, ('fneg(is_only_used_by_fadd)', (fmul, a, b)), c), - (ffma, ('fneg', a), b, c)), - - ((fadd, ('fabs(is_only_used_by_fadd)', (fmul, a, b)), c), - (ffma, ('fabs', a), ('fabs', b), c)), - - ((fadd, ('fneg(is_only_used_by_fadd)', ('fabs', (fmul, a, b))), c), - (ffma, ('fneg', ('fabs', a)), ('fabs', b), c)), - ]) - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument('-p', '--import-path', required=True) - args = parser.parse_args() - sys.path.insert(0, args.import_path) - run() - - -def run(): - import nir_algebraic # pylint: disable=import-error - - print('#include "ir3_nir.h"') - print(nir_algebraic.AlgebraicPass("ir3_nir_opt_algebraic_late", - late_optimizations).render()) - - -if __name__ == '__main__': - main() diff --git a/src/freedreno/ir3/ir3_nir_trig.py b/src/freedreno/ir3/ir3_nir_trig.py index 27efcdd41a9..00c3bb2278a 100644 --- a/src/freedreno/ir3/ir3_nir_trig.py +++ b/src/freedreno/ir3/ir3_nir_trig.py @@ -7,8 +7,9 @@ import argparse import sys trig_workarounds = [ - (('fsin', 'x@32'), ('fsin', ('!ffma_old', 6.2831853, ('ffract', ('!ffma_old', 0.15915494, 'x', 0.5)), -3.14159265))), - (('fcos', 'x@32'), ('fcos', ('!ffma_old', 6.2831853, ('ffract', ('!ffma_old', 0.15915494, 'x', 0.5)), -3.14159265))), + # !fmad as reassociation could make this not precise enough. + (('fsin', 'x@32'), ('fsin', ('!fmad', 6.2831853, ('ffract', ('!fmad', 0.15915494, 'x', 0.5)), -3.14159265))), + (('fcos', 'x@32'), ('fcos', ('!fmad', 6.2831853, ('ffract', ('!fmad', 0.15915494, 'x', 0.5)), -3.14159265))), ] diff --git a/src/freedreno/ir3/meson.build b/src/freedreno/ir3/meson.build index 6ff1f43d9c6..5241d008d0b 100644 --- a/src/freedreno/ir3/meson.build +++ b/src/freedreno/ir3/meson.build @@ -45,17 +45,6 @@ ir3_nir_triop_bitwise_c = custom_target( depend_files : nir_algebraic_depends, ) -ir3_nir_opt_algebraic_late_c = custom_target( - 'ir3_nir_opt_algebraic_late.c', - input : 'ir3_nir_opt_algebraic_late.py', - output : 'ir3_nir_opt_algebraic_late.c', - command : [ - prog_python, '@INPUT@', '-p', dir_compiler_nir, - ], - capture : true, - depend_files : nir_algebraic_depends, -) - ir3_parser = custom_target( 'ir3_parser.[ch]', input: 'ir3_parser.y', @@ -150,7 +139,6 @@ libfreedreno_ir3 = static_library( ir3_nir_imul_c, ir3_nir_branch_and_or_not_c, ir3_nir_triop_bitwise_c, - ir3_nir_opt_algebraic_late_c, ir3_parser[0], ir3_parser[1], ir3_lexer, ],