radv: move nir_opt_algebraic loop for NGG culling earlier
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Totals from 6913 (8.66% of 79825) affected shaders: (Navi21)
Instrs: 5373319 -> 5358717 (-0.27%); split: -0.30%, +0.03%
CodeSize: 27448536 -> 27345464 (-0.38%); split: -0.41%, +0.03%
SpillSGPRs: 982 -> 998 (+1.63%)
Latency: 22998827 -> 23011602 (+0.06%); split: -0.13%, +0.19%
InvThroughput: 4663749 -> 4664809 (+0.02%); split: -0.00%, +0.03%
VClause: 120845 -> 120461 (-0.32%); split: -0.49%, +0.17%
SClause: 119068 -> 116064 (-2.52%); split: -2.71%, +0.18%
Copies: 456590 -> 456450 (-0.03%); split: -0.19%, +0.16%
Branches: 145555 -> 145559 (+0.00%); split: -0.00%, +0.01%
PreSGPRs: 300465 -> 301154 (+0.23%); split: -0.01%, +0.24%
VALU: 3064127 -> 3064210 (+0.00%); split: -0.00%, +0.00%
SALU: 891257 -> 886368 (-0.55%); split: -0.71%, +0.16%
SMEM: 190500 -> 184624 (-3.08%); split: -3.11%, +0.02%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36936>
This commit is contained in:
Rhys Perry 2025-08-29 16:14:17 +01:00 committed by Marge Bot
parent 3fe651f607
commit c63c695149
3 changed files with 39 additions and 18 deletions

View file

@ -433,6 +433,15 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
if (constant_fold_for_push_const && stage->args.ac.inline_push_const_mask)
NIR_PASS(_, stage->nir, nir_opt_constant_folding);
/* Optimize NIR before NGG culling */
bool is_last_vgt_stage = radv_is_last_vgt_stage(stage);
bool lowered_ngg = stage->info.is_ngg && is_last_vgt_stage;
if (lowered_ngg && stage->nir->info.stage != MESA_SHADER_GEOMETRY && stage->info.has_ngg_culling)
radv_optimize_nir_algebraic_early(stage->nir);
/* This has to be done after nir_opt_algebraic for best descriptor vectorization, but also before
* NGG culling.
*/
NIR_PASS(_, stage->nir, radv_nir_apply_pipeline_layout, device, stage);
NIR_PASS(_, stage->nir, nir_lower_alu_width, opt_vectorize_callback, device);
@ -466,9 +475,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
}
/* Lower I/O intrinsics to memory instructions. */
bool is_last_vgt_stage = radv_is_last_vgt_stage(stage);
bool io_to_mem = radv_nir_lower_io_to_mem(device, stage);
bool lowered_ngg = stage->info.is_ngg && is_last_vgt_stage;
if (lowered_ngg) {
radv_lower_ngg(device, stage, gfx_state);
} else if (is_last_vgt_stage) {

View file

@ -234,7 +234,7 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively)
}
void
radv_optimize_nir_algebraic(nir_shader *nir, bool opt_offsets, bool opt_mqsad, enum amd_gfx_level gfx_level)
radv_optimize_nir_algebraic_early(nir_shader *nir)
{
bool more_algebraic = true;
while (more_algebraic) {
@ -258,21 +258,11 @@ radv_optimize_nir_algebraic(nir_shader *nir, bool opt_offsets, bool opt_mqsad, e
NIR_PASS(_, nir, nir_opt_remove_phis);
NIR_PASS(_, nir, nir_opt_dead_cf);
}
}
if (opt_offsets) {
const nir_opt_offsets_options offset_options = {
.uniform_max = 0,
.buffer_max = ~0,
.shared_max = UINT16_MAX,
.shared_atomic_max = UINT16_MAX,
.allow_offset_wrap_cb = ac_nir_allow_offset_wrap_cb,
.cb_data = &gfx_level,
};
NIR_PASS(_, nir, nir_opt_offsets, &offset_options);
}
if (opt_mqsad)
NIR_PASS(_, nir, nir_opt_mqsad);
void
radv_optimize_nir_algebraic_late(nir_shader *nir)
{
/* Do late algebraic optimization to turn add(a,
* neg(b)) back into subs, then the mandatory cleanup
* after algebraic. Note that it may produce fnegs,
@ -292,6 +282,28 @@ radv_optimize_nir_algebraic(nir_shader *nir, bool opt_offsets, bool opt_mqsad, e
_mesa_set_destroy(skip, NULL);
}
void
radv_optimize_nir_algebraic(nir_shader *nir, bool opt_offsets, bool opt_mqsad, enum amd_gfx_level gfx_level)
{
radv_optimize_nir_algebraic_early(nir);
if (opt_offsets) {
const nir_opt_offsets_options offset_options = {
.uniform_max = 0,
.buffer_max = ~0,
.shared_max = UINT16_MAX,
.shared_atomic_max = UINT16_MAX,
.allow_offset_wrap_cb = ac_nir_allow_offset_wrap_cb,
.cb_data = &gfx_level,
};
NIR_PASS(_, nir, nir_opt_offsets, &offset_options);
}
if (opt_mqsad)
NIR_PASS(_, nir, nir_opt_mqsad);
radv_optimize_nir_algebraic_late(nir);
}
static void
shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align)
{
@ -913,7 +925,7 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
assert(info->is_ngg);
if (info->has_ngg_culling)
radv_optimize_nir_algebraic(nir, false, false, pdev->info.gfx_level);
radv_optimize_nir_algebraic_late(nir);
options.num_vertices_per_primitive = num_vertices_per_prim;
options.early_prim_export = info->has_ngg_early_prim_export;

View file

@ -487,6 +487,8 @@ struct radv_shader_dma_submission {
struct radv_shader_stage;
void radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively);
void radv_optimize_nir_algebraic_early(nir_shader *shader);
void radv_optimize_nir_algebraic_late(nir_shader *shader);
void radv_optimize_nir_algebraic(nir_shader *shader, bool opt_offsets, bool opt_mqsad,
enum amd_gfx_level gfx_level);