diff --git a/.gitlab-ci/traces-freedreno.yml b/.gitlab-ci/traces-freedreno.yml index 163833af111..daa72ee01a1 100644 --- a/.gitlab-ci/traces-freedreno.yml +++ b/.gitlab-ci/traces-freedreno.yml @@ -11,12 +11,12 @@ traces: - path: gputest/furmark.trace expectations: - device: freedreno-a630 - checksum: de674022e53fc9e0a9eb217f8bf0fe03 + checksum: af6e1faf11407a7e7c416f2c532de029 # Note: Requires GL3.3 - path: gputest/gimark.trace expectations: - device: freedreno-a630 - checksum: 2cae8e2104356e2b3017cbd953cf7b4a + checksum: 47419914b87422b267e20b6981a7eb43 - path: gputest/pixmark-julia-fp32.trace expectations: - device: freedreno-a630 @@ -37,16 +37,16 @@ traces: expectations: # Looks fine, but totally different shape from the rendering on i965. - device: freedreno-a630 - checksum: 86d678c70b8adf27095ace1a6bbfe2d2 + checksum: 9ee5a036510be0f506705eacc1516bf3 - path: gputest/plot3d.trace expectations: - device: freedreno-a630 - checksum: 67a9eb692e694b11107860bbcd47d493 + checksum: 42aba3ab943dae2fe952cae1ff91c354 # Note: Requires GL4 for tess. - path: gputest/tessmark.trace expectations: - device: freedreno-a630 - checksum: 985e231b58b7dc4b6da34ff32f8ebb82 + checksum: 8688b3904b6b2bc591d8b669ecae4d53 - path: gputest/triangle.trace expectations: - device: freedreno-a630 @@ -149,7 +149,7 @@ traces: - path: glmark2/effect2d-kernel=1,1,1,1,1;1,1,1,1,1;1,1,1,1,1;.rdc expectations: - device: freedreno-a630 - checksum: 2346a6597f4d1f20b493e8d6a8f7e592 + checksum: 2964d37446db126a5fe462b1ba4542cd - path: glmark2/function-fragment-complexity=low:fragment-steps=5.rdc expectations: # Incorrect rendering, a bunch of the area is uniform gray when it should @@ -215,7 +215,7 @@ traces: - path: glmark2/shading-shading=gouraud.rdc expectations: - device: freedreno-a630 - checksum: fcc26fca31375b216382e69bc5f113fb + checksum: bd9058f041bd2d59c039cccdb7d50bf7 - path: glmark2/shading-shading=phong.rdc # Some speckling on the main specular highlight that may just be # mediump artifacts @@ -226,11 +226,6 @@ traces: expectations: - device: freedreno-a630 checksum: d8b5931669733240797f1acf5d98db25 - # Very yellow terrain compared to i965, may just be mediump artifacts. - - path: glmark2/terrain.rdc - expectations: - - device: freedreno-a630 - checksum: 114f7dfe97768d9c565a29f656c8f9cf - path: glmark2/texture-texture-filter=linear.rdc expectations: - device: freedreno-a630 diff --git a/.gitlab-ci/traces-radeonsi.yml b/.gitlab-ci/traces-radeonsi.yml index 111fc635e36..13b1da593ce 100644 --- a/.gitlab-ci/traces-radeonsi.yml +++ b/.gitlab-ci/traces-radeonsi.yml @@ -33,11 +33,11 @@ traces: - path: gputest/furmark.trace expectations: - device: gl-radeonsi-stoney - checksum: 1c569668d608c644f353caa177d577c6 + checksum: d71c0d8e6c46c8f29d1aa8d0ed7d3c87 - path: gputest/pixmark-piano.trace expectations: - device: gl-radeonsi-stoney - checksum: a0e1d6358f76666603b08eab383af080 + checksum: 777d48e82cabceef6d9489189f91d266 - path: gputest/triangle.trace expectations: - device: gl-radeonsi-stoney @@ -153,7 +153,7 @@ traces: - path: glmark2/shadow.rdc expectations: - device: gl-radeonsi-stoney - checksum: 4bf5ca9ce641de1031eb8125d80a3005 + checksum: 03dfbf026a0f0ab643e5a6ef19623e81 - path: glmark2/terrain.rdc expectations: - device: gl-radeonsi-stoney @@ -173,7 +173,7 @@ traces: - path: godot/Material Testers.x86_64_2020.04.08_13.38_frame799.rdc expectations: - device: gl-radeonsi-stoney - checksum: 5164e238381e7d77a64e3de771cc005f + checksum: 990abd360dc380c95ee2645f8b402d47 - path: gputest/gimark.trace expectations: - device: gl-radeonsi-stoney @@ -189,15 +189,15 @@ traces: - path: gputest/pixmark-piano.trace expectations: - device: gl-radeonsi-stoney - checksum: a0e1d6358f76666603b08eab383af080 + checksum: 777d48e82cabceef6d9489189f91d266 - path: gputest/pixmark-volplosion.trace expectations: - device: gl-radeonsi-stoney - checksum: 2fba173643c014bcfa4b31eb55a514b9 + checksum: 708f92a8ac8aef23a4a544cc5ec755d6 - path: gputest/plot3d.trace expectations: - device: gl-radeonsi-stoney - checksum: fd367551aa74e2903e0590a893da01a6 + checksum: f9e6c1cb70add69cf2a4724800d48b25 - path: gputest/tessmark.trace expectations: - device: gl-radeonsi-stoney @@ -229,7 +229,7 @@ traces: - path: supertuxkart/supertuxkart-antediluvian-abyss.rdc expectations: - device: gl-radeonsi-stoney - checksum: 17f4039392a65ad23133cb2cac82dba4 + checksum: a2c4c127873f93b7db4ef48ea9fb7689 - path: supertuxkart/supertuxkart-menu.rdc expectations: - device: gl-radeonsi-stoney @@ -237,4 +237,4 @@ traces: - path: supertuxkart/supertuxkart-ravenbridge-mansion.rdc expectations: - device: gl-radeonsi-stoney - checksum: 46f08af5c49d711b41d4082f8a5cf6d6 + checksum: c8f9eae92c67c7d53db4d69a703e3914 diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index f2ef598c912..39c07ce0b7f 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -194,7 +194,8 @@ optimizations.extend([ (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'), (('fceil', a), ('fneg', ('ffloor', ('fneg', a))), 'options->lower_fceil'), (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'), - (('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma'), + # Always lower inexact ffma, because it will be fused back by late optimizations (nir_opt_algebraic_late). + (('~ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma'), (('~fmul', ('fadd', ('iand', ('ineg', ('b2i', 'a@bool')), ('fmul', b, c)), '#d'), '#e'), ('bcsel', a, ('fmul', ('fadd', ('fmul', b, c), d), e), ('fmul', d, e))), @@ -2027,6 +2028,7 @@ late_optimizations = [ (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'), (('ineg', a), ('isub', 0, a), 'options->lower_negate'), (('iabs', a), ('imax', a, ('ineg', a)), 'options->lower_iabs'), + (('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma'), # These are duplicated from the main optimizations table. The late # patterns that rearrange expressions like x - .5 < 0 to x < .5 can create diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 534973bcf49..4b879bff13a 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -698,6 +698,17 @@ static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir) if (changed) si_nir_opts(nir, false); + /* Run late optimizations to fuse ffma. */ + bool more_late_algebraic = true; + while (more_late_algebraic) { + more_late_algebraic = false; + NIR_PASS(more_late_algebraic, nir, nir_opt_algebraic_late); + NIR_PASS_V(nir, nir_opt_constant_folding); + NIR_PASS_V(nir, nir_copy_prop); + NIR_PASS_V(nir, nir_opt_dce); + NIR_PASS_V(nir, nir_opt_cse); + } + NIR_PASS_V(nir, nir_lower_bool_to_int32); NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);