mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 02:38:04 +02:00
nir/opt_move_to_top: add an option to exclude moving at_offset/at_sample loads
This splits the nir_move_to_top_input_loads option into 2 options. The latter option is mainly for at_offset/at_sample loads. Then it updates most places to use only the first option. The rationale is that moving at_sample loads makes Control (game) shaders worse, as per the code comment.
This commit is contained in:
parent
0684976de8
commit
d108ed6888
6 changed files with 49 additions and 14 deletions
|
|
@ -372,7 +372,7 @@ radv_postprocess_nir(const struct radv_compiler_info *compiler_info, const struc
|
|||
/* Always load all VS inputs at the top to eliminate needless VMEM->s_wait->VMEM sequences.
|
||||
* Each s_wait can cost 1000 cycles, so make sure all VS input loads are grouped.
|
||||
*/
|
||||
NIR_PASS(_, stage->nir, nir_opt_move_to_top, nir_move_to_top_input_loads);
|
||||
NIR_PASS(_, stage->nir, nir_opt_move_to_top, nir_move_to_top_input_loads_simple);
|
||||
NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts);
|
||||
NIR_PASS(_, stage->nir, nir_opt_move, sink_opts);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -5874,8 +5874,21 @@ typedef enum {
|
|||
nir_move_to_entry_block_only = BITFIELD_BIT(0),
|
||||
|
||||
/* Instruction options. */
|
||||
nir_move_to_top_input_loads = BITFIELD_BIT(1),
|
||||
nir_move_to_top_load_smem_amd = BITFIELD_BIT(2),
|
||||
|
||||
/* Simple input loads are non-interpolated loads and interpolated loads
|
||||
* with pixel, centroid, and sample barycentrics. Other barycentrics are
|
||||
* excluded.
|
||||
*/
|
||||
nir_move_to_top_input_loads_simple = BITFIELD_BIT(1),
|
||||
|
||||
/* Interpolated loads with non-trivial barycentrics, such as at_offset and
|
||||
* at_sample. (this option is not recommended for Control (game) because
|
||||
* it moves at_sample with complex ALU perspective-correct interpolation
|
||||
* out of conditional blocks)
|
||||
*/
|
||||
nir_move_to_top_input_loads_complex_baryc = BITFIELD_BIT(2),
|
||||
|
||||
nir_move_to_top_load_smem_amd = BITFIELD_BIT(3),
|
||||
} nir_opt_move_to_top_options;
|
||||
|
||||
bool nir_opt_move_to_top(nir_shader *nir, nir_opt_move_to_top_options options);
|
||||
|
|
|
|||
|
|
@ -12,9 +12,9 @@
|
|||
* of instructions that are moved.
|
||||
*
|
||||
* Used either as a scheduling optimization or to accommodate hw or compiler
|
||||
* backend limitations. You would typically use this if you don't use
|
||||
* nir_lower_io_vars_to_temporaries and want to move input loads to top,
|
||||
* but note that such global code motion passes often increase register usage.
|
||||
* backend limitations. It would typically be used if
|
||||
* nir_lower_io_vars_to_temporaries isn't used and it's desirable to move input
|
||||
* loads to top, but such global code motion often increases register usage.
|
||||
*/
|
||||
|
||||
#include "nir.h"
|
||||
|
|
@ -138,10 +138,29 @@ handle_load(nir_builder *b, nir_intrinsic_instr *intr, void *_state)
|
|||
* an input load. The specific intrinsics that are moved are
|
||||
* listed in can_move_src_to_top.
|
||||
*/
|
||||
move |= state->options & nir_move_to_top_input_loads &&
|
||||
nir_intrinsic_has_io_semantics(intr) &&
|
||||
nir_intrinsic_infos[intr->intrinsic].has_dest &&
|
||||
!nir_is_output_load(intr);
|
||||
if (state->options & (nir_move_to_top_input_loads_simple |
|
||||
nir_move_to_top_input_loads_complex_baryc) &&
|
||||
nir_intrinsic_has_io_semantics(intr) &&
|
||||
nir_intrinsic_infos[intr->intrinsic].has_dest &&
|
||||
!nir_is_output_load(intr)) {
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_load_interpolated_input) {
|
||||
nir_intrinsic_instr *baryc =
|
||||
nir_def_as_intrinsic_or_null(intr->src[0].ssa);
|
||||
|
||||
nir_opt_move_to_top_options baryc_option =
|
||||
baryc &&
|
||||
(baryc->intrinsic == nir_intrinsic_load_barycentric_pixel ||
|
||||
baryc->intrinsic == nir_intrinsic_load_barycentric_centroid ||
|
||||
baryc->intrinsic == nir_intrinsic_load_barycentric_sample) ?
|
||||
nir_move_to_top_input_loads_simple :
|
||||
nir_move_to_top_input_loads_complex_baryc;
|
||||
|
||||
move |= !!(state->options & baryc_option);
|
||||
} else {
|
||||
move |= !!(state->options & nir_move_to_top_input_loads_simple);
|
||||
}
|
||||
}
|
||||
|
||||
move |= state->options & nir_move_to_top_load_smem_amd &&
|
||||
(intr->intrinsic == nir_intrinsic_load_global_amd &&
|
||||
|
|
|
|||
|
|
@ -5527,7 +5527,7 @@ nir_opt_varyings_bulk(nir_shader **shaders, uint32_t num_shaders, bool spirv,
|
|||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
NIR_PASS(_, nir, nir_opt_move_to_top,
|
||||
nir_move_to_entry_block_only |
|
||||
nir_move_to_top_input_loads);
|
||||
nir_move_to_top_input_loads_simple);
|
||||
}
|
||||
|
||||
/* nir_opt_varyings requires scalar IO. Scalarize all varyings (not just
|
||||
|
|
|
|||
|
|
@ -728,8 +728,11 @@ ir3_finalize_nir(struct ir3_compiler *compiler,
|
|||
* more optimal at the top.
|
||||
*/
|
||||
if (s->info.stage == MESA_SHADER_VERTEX ||
|
||||
s->info.stage == MESA_SHADER_FRAGMENT)
|
||||
NIR_PASS(_, s, nir_opt_move_to_top, nir_move_to_top_input_loads);
|
||||
s->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
NIR_PASS(_, s, nir_opt_move_to_top,
|
||||
nir_move_to_top_input_loads_simple |
|
||||
nir_move_to_top_input_loads_complex_baryc);
|
||||
}
|
||||
|
||||
if (s->info.stage == MESA_SHADER_GEOMETRY) {
|
||||
/* nir_unlower_io_to_vars expects constant indirect offsets to be folded
|
||||
|
|
|
|||
|
|
@ -884,7 +884,7 @@ static void si_preprocess_nir(struct si_nir_shader_ctx *ctx)
|
|||
*/
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX ||
|
||||
nir->info.stage == MESA_SHADER_FRAGMENT)
|
||||
NIR_PASS(progress, nir, nir_opt_move_to_top, nir_move_to_top_input_loads);
|
||||
NIR_PASS(progress, nir, nir_opt_move_to_top, nir_move_to_top_input_loads_simple);
|
||||
|
||||
/* Remove dead temps before we lower indirect indexing. */
|
||||
NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue