mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 18:18:06 +02:00
ac/nir_flag_smem_for_loads: call divergence analysis internally
Also don't flag more SMEM instructions (in ACO) after the last call to ac_nir_lower_mem_access_bit_sizes(). Totals from 75 (0.09% of 79839) affected shaders: (Navi48) Instrs: 191246 -> 189960 (-0.67%) CodeSize: 996840 -> 985976 (-1.09%) Latency: 3066184 -> 2945500 (-3.94%) InvThroughput: 355373 -> 353106 (-0.64%); split: -0.66%, +0.02% SClause: 4848 -> 4699 (-3.07%) Copies: 13827 -> 13925 (+0.71%); split: -0.07%, +0.78% Branches: 5176 -> 5003 (-3.34%) PreSGPRs: 6222 -> 6272 (+0.80%) VALU: 108934 -> 108993 (+0.05%); split: -0.00%, +0.06% SALU: 31679 -> 31210 (-1.48%); split: -1.51%, +0.03% SMEM: 7158 -> 6739 (-5.85%) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37843>
This commit is contained in:
parent
c8830a1a79
commit
d0b87a0d5f
4 changed files with 11 additions and 4 deletions
|
|
@ -46,7 +46,6 @@ set_smem_access_flags(nir_builder *b, nir_intrinsic_instr *intrin, void *cb_data
|
|||
case nir_intrinsic_load_ssbo:
|
||||
case nir_intrinsic_load_global:
|
||||
case nir_intrinsic_load_global_constant:
|
||||
case nir_intrinsic_load_global_amd:
|
||||
case nir_intrinsic_load_constant:
|
||||
if (cb_data->use_llvm)
|
||||
return false;
|
||||
|
|
@ -82,6 +81,15 @@ set_smem_access_flags(nir_builder *b, nir_intrinsic_instr *intrin, void *cb_data
|
|||
bool
|
||||
ac_nir_flag_smem_for_loads(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm)
|
||||
{
|
||||
/* Only use the 'ignore_undef' divergence option for ACO where we can guarantee that
|
||||
* uniform phis with undef src are residing in SGPRs, and hence, indeed uniform.
|
||||
*/
|
||||
uint32_t options =
|
||||
shader->options->divergence_analysis_options | (use_llvm ? 0 : nir_divergence_ignore_undef_if_phi_srcs);
|
||||
nir_foreach_function_impl(impl, shader) {
|
||||
nir_divergence_analysis_impl(impl, (nir_divergence_options)options);
|
||||
}
|
||||
|
||||
mem_access_cb_data cb_data = {
|
||||
.gfx_level = gfx_level,
|
||||
.use_llvm = use_llvm,
|
||||
|
|
|
|||
|
|
@ -381,7 +381,6 @@ init_context(isel_context* ctx, nir_shader* shader)
|
|||
nir_divergence_analysis_impl(impl, (nir_divergence_options)options);
|
||||
|
||||
apply_nuw_to_offsets(ctx, impl);
|
||||
ac_nir_flag_smem_for_loads(shader, ctx->program->gfx_level, false);
|
||||
|
||||
if (shader->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
nir_opt_load_skip_helpers_options skip_helper_options = {};
|
||||
|
|
|
|||
|
|
@ -353,7 +353,6 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
|||
};
|
||||
NIR_PASS(_, stage->nir, radv_nir_opt_tid_function, &tid_options);
|
||||
|
||||
nir_divergence_analysis(stage->nir);
|
||||
NIR_PASS(_, stage->nir, ac_nir_flag_smem_for_loads, gfx_level, use_llvm);
|
||||
|
||||
NIR_PASS(_, stage->nir, nir_lower_memory_model);
|
||||
|
|
@ -573,6 +572,8 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
|||
NIR_PASS(_, stage->nir, nir_opt_constant_folding);
|
||||
NIR_PASS(_, stage->nir, nir_opt_cse);
|
||||
NIR_PASS(_, stage->nir, nir_opt_shrink_vectors, true);
|
||||
|
||||
NIR_PASS(_, stage->nir, ac_nir_flag_smem_for_loads, gfx_level, use_llvm);
|
||||
NIR_PASS(_, stage->nir, ac_nir_lower_mem_access_bit_sizes, gfx_level, use_llvm);
|
||||
|
||||
nir_load_store_vectorize_options late_vectorize_opts = {
|
||||
|
|
|
|||
|
|
@ -1624,7 +1624,6 @@ static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx *
|
|||
NIR_PASS(_, nir, nir_clear_shared_memory, shared_size, chunk_size);
|
||||
}
|
||||
|
||||
nir_divergence_analysis(nir); /* required by ac_nir_flag_smem_for_loads */
|
||||
/* This is required by ac_nir_scalarize_overfetching_loads_callback. */
|
||||
NIR_PASS(progress, nir, ac_nir_flag_smem_for_loads, sel->screen->info.gfx_level,
|
||||
!sel->info.base.use_aco_amd);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue