mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 11:30:11 +01:00
radeonsi: do opt_large_constants & lower_indirect_derefs after uniform inlining
because loop unrolling caused by uniform inlining can eliminate large constants and indirect derefs. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14266>
This commit is contained in:
parent
198ad7e4dc
commit
3fb77ef2e0
2 changed files with 34 additions and 25 deletions
|
|
@ -23,9 +23,10 @@
|
|||
*/
|
||||
|
||||
#include "ac_exp_param.h"
|
||||
#include "ac_nir.h"
|
||||
#include "ac_rtld.h"
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "compiler/nir/nir_serialize.h"
|
||||
#include "nir.h"
|
||||
#include "nir_serialize.h"
|
||||
#include "si_pipe.h"
|
||||
#include "si_shader_internal.h"
|
||||
#include "sid.h"
|
||||
|
|
@ -1386,6 +1387,8 @@ struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
bool progress = false;
|
||||
|
||||
bool inline_uniforms = false;
|
||||
uint32_t *inlined_uniform_values;
|
||||
si_get_inline_uniform_state((union si_shader_key*)key, sel->pipe_shader_type,
|
||||
|
|
@ -1437,14 +1440,37 @@ struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel,
|
|||
nir->info.num_inlinable_uniforms,
|
||||
inlined_uniform_values,
|
||||
nir->info.inlinable_uniform_dw_offsets);
|
||||
progress = true;
|
||||
}
|
||||
|
||||
if (progress)
|
||||
si_nir_opts(sel->screen, nir, true);
|
||||
|
||||
/* Lower large variables that are always constant with load_constant intrinsics, which
|
||||
* get turned into PC-relative loads from a data section next to the shader.
|
||||
*
|
||||
* Loop unrolling caused by uniform inlining can help eliminate indirect indexing, so
|
||||
* this should be done after that.
|
||||
*
|
||||
* The pass crashes if there are dead temps of lowered IO interface types, so remove
|
||||
* them first.
|
||||
*/
|
||||
bool progress2 = false;
|
||||
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
|
||||
NIR_PASS(progress2, nir, nir_opt_large_constants, glsl_get_natural_size_align_bytes, 16);
|
||||
|
||||
/* Loop unrolling caused by uniform inlining can help eliminate indirect indexing, so
|
||||
* this should be done after that.
|
||||
*/
|
||||
progress2 |= ac_nir_lower_indirect_derefs(nir, sel->screen->info.chip_class);
|
||||
if (progress2)
|
||||
si_nir_opts(sel->screen, nir, false);
|
||||
|
||||
if (progress || progress2)
|
||||
si_nir_late_opts(nir);
|
||||
|
||||
/* This must be done again. */
|
||||
NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
|
||||
nir_var_shader_out);
|
||||
}
|
||||
/* This must be done again. */
|
||||
NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out);
|
||||
|
||||
/* This helps LLVM form VMEM clauses and thus get more GPU cache hits.
|
||||
* 200 is tuned for Viewperf. It should be done last.
|
||||
|
|
|
|||
|
|
@ -23,7 +23,6 @@
|
|||
*/
|
||||
|
||||
#include "ac_nir_to_llvm.h"
|
||||
#include "ac_nir.h"
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "compiler/nir/nir_deref.h"
|
||||
|
|
@ -412,6 +411,8 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info
|
|||
break;
|
||||
case nir_intrinsic_load_deref:
|
||||
case nir_intrinsic_store_deref:
|
||||
/* These can only occur if there is indirect temp indexing. */
|
||||
break;
|
||||
case nir_intrinsic_interp_deref_at_centroid:
|
||||
case nir_intrinsic_interp_deref_at_sample:
|
||||
case nir_intrinsic_interp_deref_at_offset:
|
||||
|
|
@ -936,24 +937,6 @@ static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
|
|||
}
|
||||
|
||||
si_nir_opts(sscreen, nir, true);
|
||||
|
||||
/* Lower large variables that are always constant with load_constant
|
||||
* intrinsics, which get turned into PC-relative loads from a data
|
||||
* section next to the shader.
|
||||
*
|
||||
* st/mesa calls finalize_nir twice, but we can't call this pass twice.
|
||||
*/
|
||||
bool changed = false;
|
||||
if (!nir->constant_data) {
|
||||
/* The pass crashes if there are dead temps of lowered IO interface types. */
|
||||
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
|
||||
NIR_PASS(changed, nir, nir_opt_large_constants, glsl_get_natural_size_align_bytes, 16);
|
||||
}
|
||||
|
||||
changed |= ac_nir_lower_indirect_derefs(nir, sscreen->info.chip_class);
|
||||
if (changed)
|
||||
si_nir_opts(sscreen, nir, false);
|
||||
|
||||
/* Run late optimizations to fuse ffma and eliminate 16-bit conversions. */
|
||||
si_nir_late_opts(nir);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue