mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 07:08:04 +02:00
radv: call radv_optimize_nir after lowering io
We are moving more and more passes after lowering io because they no longer operate on variables. There doesn't really seem to be a need to optimize before lowering io, so do it afterwards. Foz-DB Navi48: Totals from 2339 (1.14% of 205045) affected shaders: MaxWaves: 66218 -> 66258 (+0.06%) Instrs: 2009510 -> 2007711 (-0.09%); split: -0.15%, +0.06% CodeSize: 10646476 -> 10648376 (+0.02%); split: -0.05%, +0.07% VGPRs: 131304 -> 131232 (-0.05%) Latency: 19249976 -> 19248715 (-0.01%); split: -0.02%, +0.02% InvThroughput: 3133252 -> 3132291 (-0.03%); split: -0.05%, +0.02% VClause: 32999 -> 33003 (+0.01%); split: -0.07%, +0.08% SClause: 42959 -> 43101 (+0.33%); split: -0.27%, +0.60% Copies: 143721 -> 143792 (+0.05%); split: -0.38%, +0.43% Branches: 38736 -> 38738 (+0.01%) PreSGPRs: 106104 -> 105846 (-0.24%); split: -0.27%, +0.03% PreVGPRs: 95217 -> 95179 (-0.04%); split: -0.04%, +0.00% VALU: 1146620 -> 1144783 (-0.16%); split: -0.22%, +0.06% SALU: 275263 -> 275183 (-0.03%); split: -0.16%, +0.13% VMEM: 58353 -> 58364 (+0.02%) SMEM: 84810 -> 85215 (+0.48%); split: -0.00%, +0.48% Reviewed-by: Marek Olšák <marek.olsak@amd.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40928>
This commit is contained in:
parent
4598bbaea7
commit
cbeccb0a09
2 changed files with 24 additions and 30 deletions
|
|
@ -169,11 +169,6 @@ radv_nir_lower_io(struct radv_device *device, nir_shader *nir)
|
|||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
/* Lower explicit input load intrinsics to sysvals for the layer ID. */
|
||||
NIR_PASS(_, nir, nir_lower_system_values);
|
||||
|
||||
/* Recompute FS input intrinsic bases to assign a location to each FS input.
|
||||
* The computed base will match the index of each input in SPI_PS_INPUT_CNTL_n.
|
||||
*/
|
||||
radv_recompute_fs_input_bases(nir);
|
||||
}
|
||||
|
||||
NIR_PASS(_, nir, nir_opt_dce);
|
||||
|
|
|
|||
|
|
@ -2806,38 +2806,34 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
|
|||
radv_foreach_stage (i, active_nir_stages) {
|
||||
int64_t stage_start = os_time_get_nano();
|
||||
|
||||
radv_optimize_nir(stages[i].nir, stages[i].key.optimisations_disabled);
|
||||
|
||||
radv_nir_lower_io(device, stages[i].nir);
|
||||
|
||||
stages[i].feedback.duration += os_time_get_nano() - stage_start;
|
||||
}
|
||||
|
||||
if (stages[MESA_SHADER_FRAGMENT].nir) {
|
||||
bool update_info = false;
|
||||
if (gfx_state->dynamic_line_rast_mode)
|
||||
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_lower_poly_line_smooth,
|
||||
RADV_NUM_SMOOTH_AA_SAMPLES);
|
||||
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_lower_poly_line_smooth, RADV_NUM_SMOOTH_AA_SAMPLES);
|
||||
|
||||
if (!gfx_state->ps.has_epilog) {
|
||||
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_remap_color_attachment, gfx_state);
|
||||
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_remap_color_attachment, gfx_state);
|
||||
|
||||
/* Lower FS outputs to scalar to allow dce. */
|
||||
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
|
||||
|
||||
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_trim_fs_color_exports, &gfx_state->ps.epilog);
|
||||
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_trim_fs_color_exports, &gfx_state->ps.epilog);
|
||||
|
||||
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_copy_prop);
|
||||
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dce);
|
||||
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dead_cf);
|
||||
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_copy_prop);
|
||||
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dce);
|
||||
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dead_cf);
|
||||
}
|
||||
|
||||
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_lower_fs_input_attachment);
|
||||
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_lower_fs_input_attachment);
|
||||
|
||||
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_frag_coord_to_pixel_coord);
|
||||
if (update_info)
|
||||
nir_shader_gather_info(stages[MESA_SHADER_FRAGMENT].nir,
|
||||
nir_shader_get_entrypoint(stages[MESA_SHADER_FRAGMENT].nir));
|
||||
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_cse);
|
||||
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_copy_prop);
|
||||
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dce);
|
||||
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_frag_coord_to_pixel_coord);
|
||||
}
|
||||
|
||||
radv_foreach_stage (i, active_nir_stages) {
|
||||
|
|
@ -2864,20 +2860,23 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
|
|||
if (!remove_as_varying && !remove_as_sysval)
|
||||
continue;
|
||||
|
||||
bool progress = false;
|
||||
NIR_PASS(_, stages[i].nir, nir_remove_outputs, MESA_SHADER_FRAGMENT, remove_as_varying, remove_as_sysval);
|
||||
break;
|
||||
}
|
||||
|
||||
NIR_PASS(progress, stages[i].nir, nir_remove_outputs, MESA_SHADER_FRAGMENT, remove_as_varying, remove_as_sysval);
|
||||
radv_foreach_stage (i, active_nir_stages) {
|
||||
int64_t stage_start = os_time_get_nano();
|
||||
|
||||
if (progress) {
|
||||
/* Remove dead code resulting from removed outputs. */
|
||||
do {
|
||||
progress = false;
|
||||
NIR_PASS(progress, stages[i].nir, nir_opt_dce);
|
||||
NIR_PASS(progress, stages[i].nir, nir_opt_dead_cf);
|
||||
} while (progress);
|
||||
radv_optimize_nir(stages[i].nir, stages[i].key.optimisations_disabled);
|
||||
|
||||
if (i == MESA_SHADER_FRAGMENT) {
|
||||
/* Recompute FS input intrinsic bases to assign a location to each FS input.
|
||||
* The computed base will match the index of each input in SPI_PS_INPUT_CNTL_n.
|
||||
*/
|
||||
radv_recompute_fs_input_bases(stages[i].nir);
|
||||
}
|
||||
|
||||
break;
|
||||
stages[i].feedback.duration += os_time_get_nano() - stage_start;
|
||||
}
|
||||
|
||||
/* Optimize varyings on lowered shader I/O (more efficient than optimizing I/O derefs). */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue