diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 953b67f7e2c..026f0ee5558 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -2638,15 +2638,39 @@ late_optimizations = [ # nir_lower_to_source_mods will collapse this, but its existence during the # optimization loop can prevent other optimizations. - (('fneg', ('fneg', a)), a), + (('fneg', ('fneg', a)), a) +] - # re-combine inexact mul+add to ffma. Do this before fsub so that a * b - c - # gets combined to fma(a, b, -c). - (('~fadd@16', ('fmul(is_only_used_by_fadd)', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma16'), - (('~fadd@32', ('fmul(is_only_used_by_fadd)', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma32'), - (('~fadd@64', ('fmul(is_only_used_by_fadd)', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma64'), - (('~fadd@32', ('fmulz(is_only_used_by_fadd)', a, b), c), ('ffmaz', a, b, c), 'options->fuse_ffma32'), +# re-combine inexact mul+add to ffma. Do this before fsub so that a * b - c +# gets combined to fma(a, b, -c). +for sz, mulz in itertools.product([16, 32, 64], [False, True]): + # fmulz/ffmaz only for fp32 + if mulz and sz != 32: + continue + # Fuse the correct fmul. Only consider fmuls where the only users are fadd + # (or fneg/fabs which are assumed to be propagated away), as a heuristic to + # avoid fusing in cases where it's harmful. + fmul = ('fmulz' if mulz else 'fmul') + '(is_only_used_by_fadd)' + ffma = 'ffmaz' if mulz else 'ffma' + + fadd = '~fadd@{}'.format(sz) + option = 'options->fuse_ffma{}'.format(sz) + + late_optimizations.extend([ + ((fadd, (fmul, a, b), c), (ffma, a, b, c), option), + + ((fadd, ('fneg(is_only_used_by_fadd)', (fmul, a, b)), c), + (ffma, ('fneg', a), b, c), option), + + ((fadd, ('fabs(is_only_used_by_fadd)', (fmul, a, b)), c), + (ffma, ('fabs', a), ('fabs', b), c), option), + + ((fadd, ('fneg(is_only_used_by_fadd)', ('fabs', (fmul, a, b))), c), + (ffma, ('fneg', ('fabs', a)), ('fabs', b), c), option), + ]) + +late_optimizations.extend([ # Subtractions get lowered during optimization, so we need to recombine them (('fadd@8', a, ('fneg', 'b')), ('fsub', 'a', 'b'), 'options->has_fsub'), (('fadd@16', a, ('fneg', 'b')), ('fsub', 'a', 'b'), 'options->has_fsub'), @@ -2823,7 +2847,7 @@ late_optimizations = [ (('extract_i8', ('extract_u8', a, b), 0), ('extract_i8', a, b)), (('extract_u8', ('extract_i8', a, b), 0), ('extract_u8', a, b)), (('extract_u8', ('extract_u8', a, b), 0), ('extract_u8', a, b)), -] +]) # A few more extract cases we'd rather leave late for N in [16, 32]: diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h index da1e9b506a8..5308fc41e40 100644 --- a/src/compiler/nir/nir_search_helpers.h +++ b/src/compiler/nir/nir_search_helpers.h @@ -433,8 +433,12 @@ is_only_used_by_fadd(const nir_alu_instr *instr) const nir_alu_instr *const user_alu = nir_instr_as_alu(user_instr); assert(instr != user_alu); - if (user_alu->op != nir_op_fadd) + if (user_alu->op == nir_op_fneg || user_alu->op == nir_op_fabs) { + if (!is_only_used_by_fadd(user_alu)) + return false; + } else if (user_alu->op != nir_op_fadd) { return false; + } } return true; diff --git a/src/gallium/drivers/virgl/ci/traces-virgl-iris.yml b/src/gallium/drivers/virgl/ci/traces-virgl-iris.yml index 7ad46c464ed..05aaf2c9541 100644 --- a/src/gallium/drivers/virgl/ci/traces-virgl-iris.yml +++ b/src/gallium/drivers/virgl/ci/traces-virgl-iris.yml @@ -30,7 +30,7 @@ traces: checksum: 32e8b627a33ad08d416dfdb804920371 0ad/0ad-v2.trace: gl-virgl: - checksum: bf22fd7c3fc8baa7b0e9345728626d5f + checksum: 638fa405f78a6631ba829a8fc98392a6 glmark2/buffer:update-fraction=0.5:update-dispersion=0.9:columns=200:update-method=map:interleave=false-v2.trace: gl-virgl: checksum: 040232e01e394a967dc3320bb9252870 @@ -42,7 +42,7 @@ traces: checksum: df21895268db3ab185ae5ffa5b2d7f37 glmark2/bump:bump-render=height-v2.trace: gl-virgl: - checksum: cd32f46925906c53fae747372a8f2ed8 + checksum: cceb2b8d4852b94709684b69c688638c glmark2/bump:bump-render=high-poly-v2.trace: gl-virgl: checksum: 11b7a4820b452934e6f12b57b8910a9a @@ -126,7 +126,7 @@ traces: label: [crash] gputest/pixmark-julia-fp32-v2.trace: gl-virgl: - checksum: 0aa3a82a5b849cb83436e52c4e3e95ac + checksum: fbf5e44a6f46684b84e5bb5ad6d36c67 gputest/pixmark-julia-fp64-v2.trace: gl-virgl: checksum: 1760aea00af985b8cd902128235b08f6 diff --git a/src/gallium/drivers/virgl/ci/traces-virgl.yml b/src/gallium/drivers/virgl/ci/traces-virgl.yml index 7da834d4f83..2be64e826d5 100644 --- a/src/gallium/drivers/virgl/ci/traces-virgl.yml +++ b/src/gallium/drivers/virgl/ci/traces-virgl.yml @@ -123,7 +123,7 @@ traces: label: [crash] gputest/pixmark-julia-fp32-v2.trace: gl-virgl: - checksum: 25f938c726c68c08a88193f28f7c4474 + checksum: 8b3584b1dd8f1d1bb63205564bd78e4e gputest/pixmark-julia-fp64-v2.trace: gl-virgl: checksum: 73ccaff82ea764057fb0f93f0024cf84 @@ -183,7 +183,7 @@ traces: checksum: f4af4067b37c00861fa5911e4c0a6629 supertuxkart/supertuxkart-mansion-egl-gles-v2.trace: gl-virgl: - checksum: 092e8ca38e58aaa83df2a9f0b7b8aee5 + checksum: cc7092975dd6c9064aa54cd7f18053b6 xonotic/xonotic-keybench-high-v2.trace: gl-virgl: checksum: f3b184bf8858a6ebccd09e7ca032197e