ac/nir_flag_smem_for_loads: call divergence analysis internally
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Also don't flag more SMEM instructions (in ACO) after the last
call to ac_nir_lower_mem_access_bit_sizes().

Totals from 75 (0.09% of 79839) affected shaders: (Navi48)

Instrs: 191246 -> 189960 (-0.67%)
CodeSize: 996840 -> 985976 (-1.09%)
Latency: 3066184 -> 2945500 (-3.94%)
InvThroughput: 355373 -> 353106 (-0.64%); split: -0.66%, +0.02%
SClause: 4848 -> 4699 (-3.07%)
Copies: 13827 -> 13925 (+0.71%); split: -0.07%, +0.78%
Branches: 5176 -> 5003 (-3.34%)
PreSGPRs: 6222 -> 6272 (+0.80%)
VALU: 108934 -> 108993 (+0.05%); split: -0.00%, +0.06%
SALU: 31679 -> 31210 (-1.48%); split: -1.51%, +0.03%
SMEM: 7158 -> 6739 (-5.85%)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37843>
This commit is contained in:
Daniel Schürmann 2025-10-10 15:02:45 +02:00 committed by Marge Bot
parent c8830a1a79
commit d0b87a0d5f
4 changed files with 11 additions and 4 deletions

View file

@ -46,7 +46,6 @@ set_smem_access_flags(nir_builder *b, nir_intrinsic_instr *intrin, void *cb_data
case nir_intrinsic_load_ssbo:
case nir_intrinsic_load_global:
case nir_intrinsic_load_global_constant:
case nir_intrinsic_load_global_amd:
case nir_intrinsic_load_constant:
if (cb_data->use_llvm)
return false;
@ -82,6 +81,15 @@ set_smem_access_flags(nir_builder *b, nir_intrinsic_instr *intrin, void *cb_data
bool
ac_nir_flag_smem_for_loads(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm)
{
/* Only use the 'ignore_undef' divergence option for ACO where we can guarantee that
* uniform phis with undef src are residing in SGPRs, and hence, indeed uniform.
*/
uint32_t options =
shader->options->divergence_analysis_options | (use_llvm ? 0 : nir_divergence_ignore_undef_if_phi_srcs);
nir_foreach_function_impl(impl, shader) {
nir_divergence_analysis_impl(impl, (nir_divergence_options)options);
}
mem_access_cb_data cb_data = {
.gfx_level = gfx_level,
.use_llvm = use_llvm,

View file

@ -381,7 +381,6 @@ init_context(isel_context* ctx, nir_shader* shader)
nir_divergence_analysis_impl(impl, (nir_divergence_options)options);
apply_nuw_to_offsets(ctx, impl);
ac_nir_flag_smem_for_loads(shader, ctx->program->gfx_level, false);
if (shader->info.stage == MESA_SHADER_FRAGMENT) {
nir_opt_load_skip_helpers_options skip_helper_options = {};

View file

@ -353,7 +353,6 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
};
NIR_PASS(_, stage->nir, radv_nir_opt_tid_function, &tid_options);
nir_divergence_analysis(stage->nir);
NIR_PASS(_, stage->nir, ac_nir_flag_smem_for_loads, gfx_level, use_llvm);
NIR_PASS(_, stage->nir, nir_lower_memory_model);
@ -573,6 +572,8 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
NIR_PASS(_, stage->nir, nir_opt_constant_folding);
NIR_PASS(_, stage->nir, nir_opt_cse);
NIR_PASS(_, stage->nir, nir_opt_shrink_vectors, true);
NIR_PASS(_, stage->nir, ac_nir_flag_smem_for_loads, gfx_level, use_llvm);
NIR_PASS(_, stage->nir, ac_nir_lower_mem_access_bit_sizes, gfx_level, use_llvm);
nir_load_store_vectorize_options late_vectorize_opts = {

View file

@ -1624,7 +1624,6 @@ static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx *
NIR_PASS(_, nir, nir_clear_shared_memory, shared_size, chunk_size);
}
nir_divergence_analysis(nir); /* required by ac_nir_flag_smem_for_loads */
/* This is required by ac_nir_scalarize_overfetching_loads_callback. */
NIR_PASS(progress, nir, ac_nir_flag_smem_for_loads, sel->screen->info.gfx_level,
!sel->info.base.use_aco_amd);