From e7a4e97076a259bbbea9b02cc804494a8eef9fd8 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Wed, 2 Mar 2022 12:15:15 +0100 Subject: [PATCH] nir/schedule: use larger delay for non-filtered memory reads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This has been pending for a long time. It is not very consistent to add a significant delay for textures and not do it for UBOs, etc The reason we have not been doing this so far is the accumulated effect on register pressure for V3D as shown by shader-db results below, but from the point of view of a generic scheduler it makes sense to do this. Later patches will address V3D specific issues with register pressure derived from this by letting the driver control its instruction delay settings. total instructions in shared programs: 12662138 -> 13126587 (3.67%) instructions in affected programs: 1813091 -> 2277540 (25.62%) helped: 2410 HURT: 10499 total threads in shared programs: 415858 -> 407208 (-2.08%) threads in affected programs: 17348 -> 8698 (-49.86%) helped: 8 HURT: 4333 total uniforms in shared programs: 3711483 -> 3812698 (2.73%) uniforms in affected programs: 128012 -> 229227 (79.07%) helped: 3474 HURT: 2143 total max-temps in shared programs: 2138763 -> 2318430 (8.40%) max-temps in affected programs: 318780 -> 498447 (56.36%) helped: 588 HURT: 11997 total spills in shared programs: 3860 -> 49086 (1171.66%) spills in affected programs: 709 -> 45935 (6378.84%) helped: 23 HURT: 1595 total fills in shared programs: 5573 -> 55810 (901.44%) fills in affected programs: 1067 -> 51304 (4708.25%) helped: 23 HURT: 1595 LOST: 3 GAINED: 0 Reviewed-by: Alejandro PiƱeiro Part-of: --- src/broadcom/compiler/nir_to_vir.c | 2 +- src/compiler/nir/nir_schedule.c | 13 +++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 28559048468..7125c35aba4 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -2158,7 +2158,7 @@ v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s) nir_move_options sink_opts = nir_move_const_undef | nir_move_comparisons | nir_move_copies | - nir_move_load_ubo | nir_move_load_uniform; + nir_move_load_ubo | nir_move_load_ssbo | nir_move_load_uniform; NIR_PASS(progress, s, nir_opt_sink, sink_opts); } diff --git a/src/compiler/nir/nir_schedule.c b/src/compiler/nir/nir_schedule.c index 025a6d722ed..d9aca42f9c5 100644 --- a/src/compiler/nir/nir_schedule.c +++ b/src/compiler/nir/nir_schedule.c @@ -1016,8 +1016,17 @@ nir_schedule_get_delay(nir_instr *instr) return 1; case nir_instr_type_intrinsic: - /* XXX: Pick a large number for UBO/SSBO/image/shared loads */ - return 1; + switch (nir_instr_as_intrinsic(instr)->intrinsic) { + case nir_intrinsic_load_ubo: + case nir_intrinsic_load_ssbo: + case nir_intrinsic_load_scratch: + case nir_intrinsic_load_shared: + case nir_intrinsic_image_load: + return 50; + default: + return 1; + } + break; case nir_instr_type_tex: /* Pick some large number to try to fetch textures early and sample them