diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index a4ba103c91e..e4aa4a842f4 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -701,6 +701,35 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat NIR_PASS(_, stage->nir, nir_opt_move, nir_move_comparisons); } + if (gfx_level >= GFX12) { + /* loadcnt */ + NIR_PASS(_, stage->nir, nir_opt_move_reorder_loads, + nir_move_tex_load | nir_move_tex_load_fragment_mask | + nir_move_load_image | nir_move_load_image_fragment_mask | + nir_move_load_global | nir_move_load_ubo | nir_move_load_ssbo | + nir_move_load_buffer_amd | nir_move_only_divergent); + + /* samplecnt (these flags are unaffected by nir_move_only_divergent) */ + NIR_PASS(_, stage->nir, nir_opt_move_reorder_loads, + nir_move_tex_sample | nir_move_tex_lod); + } else { + /* vmcnt */ + NIR_PASS(_, stage->nir, nir_opt_move_reorder_loads, + nir_move_tex_sample | nir_move_tex_lod | + nir_move_tex_load | nir_move_tex_load_fragment_mask | + nir_move_load_image | nir_move_load_image_fragment_mask | + nir_move_load_global | nir_move_load_ubo | nir_move_load_ssbo | + nir_move_load_buffer_amd | nir_move_only_divergent); + } + + /* lgkmcnt/kmcnt (even though SMEM can finish out of order, putting the loads in the optimal + * order can help the backend scheduler) + */ + NIR_PASS(_, stage->nir, nir_opt_move_reorder_loads, + nir_move_load_global | nir_move_load_ubo | nir_move_load_ssbo | nir_move_only_convergent); + + NIR_PASS(_, stage->nir, nir_opt_group_loads, nir_group_same_resource_only, 16); + stage->info.nir_shared_size = stage->nir->info.shared_size; }