diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 884eff95f8d..204848f6e1d 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -433,6 +433,15 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat if (constant_fold_for_push_const && stage->args.ac.inline_push_const_mask) NIR_PASS(_, stage->nir, nir_opt_constant_folding); + /* Optimize NIR before NGG culling */ + bool is_last_vgt_stage = radv_is_last_vgt_stage(stage); + bool lowered_ngg = stage->info.is_ngg && is_last_vgt_stage; + if (lowered_ngg && stage->nir->info.stage != MESA_SHADER_GEOMETRY && stage->info.has_ngg_culling) + radv_optimize_nir_algebraic_early(stage->nir); + + /* This has to be done after nir_opt_algebraic for best descriptor vectorization, but also before + * NGG culling. + */ NIR_PASS(_, stage->nir, radv_nir_apply_pipeline_layout, device, stage); NIR_PASS(_, stage->nir, nir_lower_alu_width, opt_vectorize_callback, device); @@ -466,9 +475,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat } /* Lower I/O intrinsics to memory instructions. */ - bool is_last_vgt_stage = radv_is_last_vgt_stage(stage); bool io_to_mem = radv_nir_lower_io_to_mem(device, stage); - bool lowered_ngg = stage->info.is_ngg && is_last_vgt_stage; if (lowered_ngg) { radv_lower_ngg(device, stage, gfx_state); } else if (is_last_vgt_stage) { diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index a702997fb26..4c7373c0266 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -234,7 +234,7 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively) } void -radv_optimize_nir_algebraic(nir_shader *nir, bool opt_offsets, bool opt_mqsad, enum amd_gfx_level gfx_level) +radv_optimize_nir_algebraic_early(nir_shader *nir) { bool more_algebraic = true; while (more_algebraic) { @@ -258,21 +258,11 @@ radv_optimize_nir_algebraic(nir_shader *nir, bool opt_offsets, bool opt_mqsad, e NIR_PASS(_, nir, nir_opt_remove_phis); NIR_PASS(_, nir, nir_opt_dead_cf); } +} - if (opt_offsets) { - const nir_opt_offsets_options offset_options = { - .uniform_max = 0, - .buffer_max = ~0, - .shared_max = UINT16_MAX, - .shared_atomic_max = UINT16_MAX, - .allow_offset_wrap_cb = ac_nir_allow_offset_wrap_cb, - .cb_data = &gfx_level, - }; - NIR_PASS(_, nir, nir_opt_offsets, &offset_options); - } - if (opt_mqsad) - NIR_PASS(_, nir, nir_opt_mqsad); - +void +radv_optimize_nir_algebraic_late(nir_shader *nir) +{ /* Do late algebraic optimization to turn add(a, * neg(b)) back into subs, then the mandatory cleanup * after algebraic. Note that it may produce fnegs, @@ -292,6 +282,28 @@ radv_optimize_nir_algebraic(nir_shader *nir, bool opt_offsets, bool opt_mqsad, e _mesa_set_destroy(skip, NULL); } +void +radv_optimize_nir_algebraic(nir_shader *nir, bool opt_offsets, bool opt_mqsad, enum amd_gfx_level gfx_level) +{ + radv_optimize_nir_algebraic_early(nir); + + if (opt_offsets) { + const nir_opt_offsets_options offset_options = { + .uniform_max = 0, + .buffer_max = ~0, + .shared_max = UINT16_MAX, + .shared_atomic_max = UINT16_MAX, + .allow_offset_wrap_cb = ac_nir_allow_offset_wrap_cb, + .cb_data = &gfx_level, + }; + NIR_PASS(_, nir, nir_opt_offsets, &offset_options); + } + if (opt_mqsad) + NIR_PASS(_, nir, nir_opt_mqsad); + + radv_optimize_nir_algebraic_late(nir); +} + static void shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align) { @@ -913,7 +925,7 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage, assert(info->is_ngg); if (info->has_ngg_culling) - radv_optimize_nir_algebraic(nir, false, false, pdev->info.gfx_level); + radv_optimize_nir_algebraic_late(nir); options.num_vertices_per_primitive = num_vertices_per_prim; options.early_prim_export = info->has_ngg_early_prim_export; diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index dacc6d9cc4d..c25a3101f04 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -487,6 +487,8 @@ struct radv_shader_dma_submission { struct radv_shader_stage; void radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively); +void radv_optimize_nir_algebraic_early(nir_shader *shader); +void radv_optimize_nir_algebraic_late(nir_shader *shader); void radv_optimize_nir_algebraic(nir_shader *shader, bool opt_offsets, bool opt_mqsad, enum amd_gfx_level gfx_level);