From 1bcd848816f3c9abf40362fd3ca3ca175774094b Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Fri, 15 Apr 2022 12:08:07 -0700 Subject: [PATCH] freedreno/ir3: Call nir_opt_find_array_copies(). gfxbench vk-5-normal has a shader that sampels into a texels[] array at the top, then in a loop calls a GLSL function passing texels[] in by value. This resulted in a copy to a temp inside the loop, which got lowered to scratch stores since it was pretty big. By doing find_array_copies(), we notice that it's equivalent to copy_deref, then get to copy-propagate from the array at the top. Then we only have to set up the scratch array outside of the loop and load_scratch from it in the called function inside the loop. This also causes there to be less spilling, stps 1144 -> 354 and ldps 826->36. However, it doesn't seem to change performance on the test. So, while this seems to be an improvement for the shader, and we could maybe even do better by rematerializing the txl samples inside the loop instead of storing the texture fetches to scratch in the first place, it doesn't currently seem worth pursuing more optimization of this shader. No change on freedreno shader-db. Part-of: --- src/freedreno/ir3/ir3_nir.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 96529f35727..c08377e8ab9 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -81,8 +81,6 @@ ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s) progress = false; OPT_V(s, nir_lower_vars_to_ssa); - progress |= OPT(s, nir_opt_copy_prop_vars); - progress |= OPT(s, nir_opt_dead_write_vars); progress |= OPT(s, nir_lower_alu_to_scalar, NULL, NULL); progress |= OPT(s, nir_lower_phis_to_scalar, false); @@ -90,6 +88,11 @@ ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s) progress |= OPT(s, nir_opt_deref); progress |= OPT(s, nir_opt_dce); progress |= OPT(s, nir_opt_cse); + + progress |= OPT(s, nir_opt_find_array_copies); + progress |= OPT(s, nir_opt_copy_prop_vars); + progress |= OPT(s, nir_opt_dead_write_vars); + static int gcm = -1; if (gcm == -1) gcm = env_var_as_unsigned("GCM", 0); @@ -165,6 +168,8 @@ ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s) progress |= OPT(s, nir_opt_remove_phis); progress |= OPT(s, nir_opt_undef); } while (progress); + + OPT(s, nir_lower_var_copies); } static bool