mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 13:40:16 +01:00
radeonsi: do opt_large_constants & lower_indirect_derefs after uniform inlining
because loop unrolling caused by uniform inlining can eliminate large constants and indirect derefs. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14266>
This commit is contained in:
parent
198ad7e4dc
commit
3fb77ef2e0
2 changed files with 34 additions and 25 deletions
|
|
@ -23,9 +23,10 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "ac_exp_param.h"
|
#include "ac_exp_param.h"
|
||||||
|
#include "ac_nir.h"
|
||||||
#include "ac_rtld.h"
|
#include "ac_rtld.h"
|
||||||
#include "compiler/nir/nir.h"
|
#include "nir.h"
|
||||||
#include "compiler/nir/nir_serialize.h"
|
#include "nir_serialize.h"
|
||||||
#include "si_pipe.h"
|
#include "si_pipe.h"
|
||||||
#include "si_shader_internal.h"
|
#include "si_shader_internal.h"
|
||||||
#include "sid.h"
|
#include "sid.h"
|
||||||
|
|
@ -1386,6 +1387,8 @@ struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool progress = false;
|
||||||
|
|
||||||
bool inline_uniforms = false;
|
bool inline_uniforms = false;
|
||||||
uint32_t *inlined_uniform_values;
|
uint32_t *inlined_uniform_values;
|
||||||
si_get_inline_uniform_state((union si_shader_key*)key, sel->pipe_shader_type,
|
si_get_inline_uniform_state((union si_shader_key*)key, sel->pipe_shader_type,
|
||||||
|
|
@ -1437,14 +1440,37 @@ struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel,
|
||||||
nir->info.num_inlinable_uniforms,
|
nir->info.num_inlinable_uniforms,
|
||||||
inlined_uniform_values,
|
inlined_uniform_values,
|
||||||
nir->info.inlinable_uniform_dw_offsets);
|
nir->info.inlinable_uniform_dw_offsets);
|
||||||
|
progress = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (progress)
|
||||||
si_nir_opts(sel->screen, nir, true);
|
si_nir_opts(sel->screen, nir, true);
|
||||||
|
|
||||||
|
/* Lower large variables that are always constant with load_constant intrinsics, which
|
||||||
|
* get turned into PC-relative loads from a data section next to the shader.
|
||||||
|
*
|
||||||
|
* Loop unrolling caused by uniform inlining can help eliminate indirect indexing, so
|
||||||
|
* this should be done after that.
|
||||||
|
*
|
||||||
|
* The pass crashes if there are dead temps of lowered IO interface types, so remove
|
||||||
|
* them first.
|
||||||
|
*/
|
||||||
|
bool progress2 = false;
|
||||||
|
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
|
||||||
|
NIR_PASS(progress2, nir, nir_opt_large_constants, glsl_get_natural_size_align_bytes, 16);
|
||||||
|
|
||||||
|
/* Loop unrolling caused by uniform inlining can help eliminate indirect indexing, so
|
||||||
|
* this should be done after that.
|
||||||
|
*/
|
||||||
|
progress2 |= ac_nir_lower_indirect_derefs(nir, sel->screen->info.chip_class);
|
||||||
|
if (progress2)
|
||||||
|
si_nir_opts(sel->screen, nir, false);
|
||||||
|
|
||||||
|
if (progress || progress2)
|
||||||
si_nir_late_opts(nir);
|
si_nir_late_opts(nir);
|
||||||
|
|
||||||
/* This must be done again. */
|
/* This must be done again. */
|
||||||
NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
|
NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out);
|
||||||
nir_var_shader_out);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* This helps LLVM form VMEM clauses and thus get more GPU cache hits.
|
/* This helps LLVM form VMEM clauses and thus get more GPU cache hits.
|
||||||
* 200 is tuned for Viewperf. It should be done last.
|
* 200 is tuned for Viewperf. It should be done last.
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,6 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "ac_nir_to_llvm.h"
|
#include "ac_nir_to_llvm.h"
|
||||||
#include "ac_nir.h"
|
|
||||||
#include "compiler/nir/nir.h"
|
#include "compiler/nir/nir.h"
|
||||||
#include "compiler/nir/nir_builder.h"
|
#include "compiler/nir/nir_builder.h"
|
||||||
#include "compiler/nir/nir_deref.h"
|
#include "compiler/nir/nir_deref.h"
|
||||||
|
|
@ -412,6 +411,8 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info
|
||||||
break;
|
break;
|
||||||
case nir_intrinsic_load_deref:
|
case nir_intrinsic_load_deref:
|
||||||
case nir_intrinsic_store_deref:
|
case nir_intrinsic_store_deref:
|
||||||
|
/* These can only occur if there is indirect temp indexing. */
|
||||||
|
break;
|
||||||
case nir_intrinsic_interp_deref_at_centroid:
|
case nir_intrinsic_interp_deref_at_centroid:
|
||||||
case nir_intrinsic_interp_deref_at_sample:
|
case nir_intrinsic_interp_deref_at_sample:
|
||||||
case nir_intrinsic_interp_deref_at_offset:
|
case nir_intrinsic_interp_deref_at_offset:
|
||||||
|
|
@ -936,24 +937,6 @@ static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
|
||||||
}
|
}
|
||||||
|
|
||||||
si_nir_opts(sscreen, nir, true);
|
si_nir_opts(sscreen, nir, true);
|
||||||
|
|
||||||
/* Lower large variables that are always constant with load_constant
|
|
||||||
* intrinsics, which get turned into PC-relative loads from a data
|
|
||||||
* section next to the shader.
|
|
||||||
*
|
|
||||||
* st/mesa calls finalize_nir twice, but we can't call this pass twice.
|
|
||||||
*/
|
|
||||||
bool changed = false;
|
|
||||||
if (!nir->constant_data) {
|
|
||||||
/* The pass crashes if there are dead temps of lowered IO interface types. */
|
|
||||||
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
|
|
||||||
NIR_PASS(changed, nir, nir_opt_large_constants, glsl_get_natural_size_align_bytes, 16);
|
|
||||||
}
|
|
||||||
|
|
||||||
changed |= ac_nir_lower_indirect_derefs(nir, sscreen->info.chip_class);
|
|
||||||
if (changed)
|
|
||||||
si_nir_opts(sscreen, nir, false);
|
|
||||||
|
|
||||||
/* Run late optimizations to fuse ffma and eliminate 16-bit conversions. */
|
/* Run late optimizations to fuse ffma and eliminate 16-bit conversions. */
|
||||||
si_nir_late_opts(nir);
|
si_nir_late_opts(nir);
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue