mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 15:10:10 +01:00
amd: optimize atomics before lowering intrinsics
ac_nir_lower_intrinsics_to_args will lower most system values. I have to keep the divergence analysis in ACO, otherwise it goes haywire. Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32782>
This commit is contained in:
parent
d30c55abf1
commit
7fbca998b1
5 changed files with 28 additions and 7 deletions
|
|
@ -2208,3 +2208,15 @@ ac_nir_lower_mem_access_bit_sizes(nir_shader *shader, enum amd_gfx_level gfx_lev
|
|||
};
|
||||
return nir_lower_mem_access_bit_sizes(shader, &lower_mem_access_options);
|
||||
}
|
||||
|
||||
bool
|
||||
ac_nir_optimize_uniform_atomics(nir_shader *nir)
|
||||
{
|
||||
bool progress = false;
|
||||
NIR_PASS(progress, nir, ac_nir_opt_shared_append);
|
||||
|
||||
nir_divergence_analysis(nir);
|
||||
NIR_PASS(progress, nir, nir_opt_uniform_atomics, false);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -344,6 +344,9 @@ ac_nir_flag_smem_for_loads(nir_shader *shader, enum amd_gfx_level gfx_level, boo
|
|||
bool
|
||||
ac_nir_lower_mem_access_bit_sizes(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm);
|
||||
|
||||
bool
|
||||
ac_nir_optimize_uniform_atomics(nir_shader *nir);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -364,16 +364,10 @@ init_context(isel_context* ctx, nir_shader* shader)
|
|||
ctx->ub_config.max_workgroup_size[1] = 1024;
|
||||
ctx->ub_config.max_workgroup_size[2] = 1024;
|
||||
|
||||
ac_nir_opt_shared_append(shader);
|
||||
|
||||
uint32_t options =
|
||||
shader->options->divergence_analysis_options | nir_divergence_ignore_undef_if_phi_srcs;
|
||||
nir_divergence_analysis_impl(impl, (nir_divergence_options)options);
|
||||
shader->info.divergence_analysis_run = true;
|
||||
if (nir_opt_uniform_atomics(shader, false)) {
|
||||
nir_lower_int64(shader);
|
||||
nir_divergence_analysis_impl(impl, (nir_divergence_options)options);
|
||||
}
|
||||
|
||||
apply_nuw_to_offsets(ctx, impl);
|
||||
ac_nir_flag_smem_for_loads(shader, ctx->program->gfx_level, false, true);
|
||||
|
|
|
|||
|
|
@ -505,6 +505,12 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
|||
NIR_PASS(_, stage->nir, nir_clear_shared_memory, shared_size, chunk_size);
|
||||
}
|
||||
|
||||
/* This must be after lowering resources to descriptor loads and before lowering intrinsics
|
||||
* to args and lowering int64.
|
||||
*/
|
||||
if (!radv_use_llvm_for_stage(pdev, stage->stage))
|
||||
ac_nir_optimize_uniform_atomics(stage->nir);
|
||||
|
||||
NIR_PASS(_, stage->nir, nir_lower_int64);
|
||||
|
||||
NIR_PASS(_, stage->nir, nir_opt_idiv_const, 8);
|
||||
|
|
|
|||
|
|
@ -2582,7 +2582,6 @@ static struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_
|
|||
});
|
||||
|
||||
NIR_PASS(progress, nir, nir_lower_pack);
|
||||
NIR_PASS(progress, nir, nir_lower_int64);
|
||||
NIR_PASS(progress, nir, nir_opt_idiv_const, 8);
|
||||
NIR_PASS(progress, nir, nir_lower_idiv,
|
||||
&(nir_lower_idiv_options){
|
||||
|
|
@ -2635,6 +2634,13 @@ static struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_
|
|||
/* This must be after vectorization because it causes bindings_different_restrict() to fail. */
|
||||
NIR_PASS(progress, nir, si_nir_lower_resource, shader, args);
|
||||
|
||||
/* This must be after lowering resources to descriptor loads and before lowering intrinsics
|
||||
* to args and lowering int64.
|
||||
*/
|
||||
if (nir->info.use_aco_amd)
|
||||
progress |= ac_nir_optimize_uniform_atomics(nir);
|
||||
|
||||
NIR_PASS(progress, nir, nir_lower_int64);
|
||||
NIR_PASS(progress, nir, si_nir_lower_abi, shader, args);
|
||||
NIR_PASS(progress, nir, ac_nir_lower_intrinsics_to_args, sel->screen->info.gfx_level,
|
||||
sel->screen->info.has_ls_vgpr_init_bug,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue