diff --git a/src/amd/common/ac_nir.c b/src/amd/common/ac_nir.c index 7bcd657b518..f66cd301bc5 100644 --- a/src/amd/common/ac_nir.c +++ b/src/amd/common/ac_nir.c @@ -2208,3 +2208,15 @@ ac_nir_lower_mem_access_bit_sizes(nir_shader *shader, enum amd_gfx_level gfx_lev }; return nir_lower_mem_access_bit_sizes(shader, &lower_mem_access_options); } + +bool +ac_nir_optimize_uniform_atomics(nir_shader *nir) +{ + bool progress = false; + NIR_PASS(progress, nir, ac_nir_opt_shared_append); + + nir_divergence_analysis(nir); + NIR_PASS(progress, nir, nir_opt_uniform_atomics, false); + + return progress; +} diff --git a/src/amd/common/ac_nir.h b/src/amd/common/ac_nir.h index a418e90a45c..e2f6497a031 100644 --- a/src/amd/common/ac_nir.h +++ b/src/amd/common/ac_nir.h @@ -344,6 +344,9 @@ ac_nir_flag_smem_for_loads(nir_shader *shader, enum amd_gfx_level gfx_level, boo bool ac_nir_lower_mem_access_bit_sizes(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm); +bool +ac_nir_optimize_uniform_atomics(nir_shader *nir); + #ifdef __cplusplus } #endif diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index 5f6798aae06..b9d08daa672 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -364,16 +364,10 @@ init_context(isel_context* ctx, nir_shader* shader) ctx->ub_config.max_workgroup_size[1] = 1024; ctx->ub_config.max_workgroup_size[2] = 1024; - ac_nir_opt_shared_append(shader); - uint32_t options = shader->options->divergence_analysis_options | nir_divergence_ignore_undef_if_phi_srcs; nir_divergence_analysis_impl(impl, (nir_divergence_options)options); shader->info.divergence_analysis_run = true; - if (nir_opt_uniform_atomics(shader, false)) { - nir_lower_int64(shader); - nir_divergence_analysis_impl(impl, (nir_divergence_options)options); - } apply_nuw_to_offsets(ctx, impl); ac_nir_flag_smem_for_loads(shader, ctx->program->gfx_level, false, true); diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 95386c5839a..3c97b653687 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -505,6 +505,12 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat NIR_PASS(_, stage->nir, nir_clear_shared_memory, shared_size, chunk_size); } + /* This must be after lowering resources to descriptor loads and before lowering intrinsics + * to args and lowering int64. + */ + if (!radv_use_llvm_for_stage(pdev, stage->stage)) + ac_nir_optimize_uniform_atomics(stage->nir); + NIR_PASS(_, stage->nir, nir_lower_int64); NIR_PASS(_, stage->nir, nir_opt_idiv_const, 8); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 59271d4729a..feeeb3776b8 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2582,7 +2582,6 @@ static struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_ }); NIR_PASS(progress, nir, nir_lower_pack); - NIR_PASS(progress, nir, nir_lower_int64); NIR_PASS(progress, nir, nir_opt_idiv_const, 8); NIR_PASS(progress, nir, nir_lower_idiv, &(nir_lower_idiv_options){ @@ -2635,6 +2634,13 @@ static struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_ /* This must be after vectorization because it causes bindings_different_restrict() to fail. */ NIR_PASS(progress, nir, si_nir_lower_resource, shader, args); + /* This must be after lowering resources to descriptor loads and before lowering intrinsics + * to args and lowering int64. + */ + if (nir->info.use_aco_amd) + progress |= ac_nir_optimize_uniform_atomics(nir); + + NIR_PASS(progress, nir, nir_lower_int64); NIR_PASS(progress, nir, si_nir_lower_abi, shader, args); NIR_PASS(progress, nir, ac_nir_lower_intrinsics_to_args, sel->screen->info.gfx_level, sel->screen->info.has_ls_vgpr_init_bug,