From a373b558c00093df2d8b21af2c5b84c3eb9540c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 3 Jan 2022 03:40:02 -0500 Subject: [PATCH] radeonsi: run nir_io_add_const_offset_to_base for TES/TCS as late optimizations Other stages don't have indirect indexing, so it's always const. Doing it here should also remove dead load_const instructions. Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_shader.c | 3 --- src/gallium/drivers/radeonsi/si_shader_nir.c | 8 ++++++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index cab47793228..3344774523c 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1477,9 +1477,6 @@ struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel, if (progress || progress2) si_nir_late_opts(nir); - /* This must be done again. */ - NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out); - /* This helps LLVM form VMEM clauses and thus get more GPU cache hits. * 200 is tuned for Viewperf. It should be done last. */ diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 50094e1ea3a..7e455ac2826 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -127,6 +127,14 @@ void si_nir_late_opts(nir_shader *nir) more_late_algebraic = false; NIR_PASS(more_late_algebraic, nir, nir_opt_algebraic_late); NIR_PASS_V(nir, nir_opt_constant_folding); + + /* We should run this after constant folding for stages that support indirect + * inputs/outputs. + */ + if (nir->options->support_indirect_inputs & BITFIELD_BIT(nir->info.stage) || + nir->options->support_indirect_outputs & BITFIELD_BIT(nir->info.stage)) + NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out); + NIR_PASS_V(nir, nir_copy_prop); NIR_PASS_V(nir, nir_opt_dce); NIR_PASS_V(nir, nir_opt_cse);