mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-30 03:30:10 +01:00
radv: move nir_opt_algebraic loop for NGG culling earlier
Totals from 6913 (8.66% of 79825) affected shaders: (Navi21) Instrs: 5373319 -> 5358717 (-0.27%); split: -0.30%, +0.03% CodeSize: 27448536 -> 27345464 (-0.38%); split: -0.41%, +0.03% SpillSGPRs: 982 -> 998 (+1.63%) Latency: 22998827 -> 23011602 (+0.06%); split: -0.13%, +0.19% InvThroughput: 4663749 -> 4664809 (+0.02%); split: -0.00%, +0.03% VClause: 120845 -> 120461 (-0.32%); split: -0.49%, +0.17% SClause: 119068 -> 116064 (-2.52%); split: -2.71%, +0.18% Copies: 456590 -> 456450 (-0.03%); split: -0.19%, +0.16% Branches: 145555 -> 145559 (+0.00%); split: -0.00%, +0.01% PreSGPRs: 300465 -> 301154 (+0.23%); split: -0.01%, +0.24% VALU: 3064127 -> 3064210 (+0.00%); split: -0.00%, +0.00% SALU: 891257 -> 886368 (-0.55%); split: -0.71%, +0.16% SMEM: 190500 -> 184624 (-3.08%); split: -3.11%, +0.02% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36936>
This commit is contained in:
parent
3fe651f607
commit
c63c695149
3 changed files with 39 additions and 18 deletions
|
|
@ -433,6 +433,15 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
|||
if (constant_fold_for_push_const && stage->args.ac.inline_push_const_mask)
|
||||
NIR_PASS(_, stage->nir, nir_opt_constant_folding);
|
||||
|
||||
/* Optimize NIR before NGG culling */
|
||||
bool is_last_vgt_stage = radv_is_last_vgt_stage(stage);
|
||||
bool lowered_ngg = stage->info.is_ngg && is_last_vgt_stage;
|
||||
if (lowered_ngg && stage->nir->info.stage != MESA_SHADER_GEOMETRY && stage->info.has_ngg_culling)
|
||||
radv_optimize_nir_algebraic_early(stage->nir);
|
||||
|
||||
/* This has to be done after nir_opt_algebraic for best descriptor vectorization, but also before
|
||||
* NGG culling.
|
||||
*/
|
||||
NIR_PASS(_, stage->nir, radv_nir_apply_pipeline_layout, device, stage);
|
||||
|
||||
NIR_PASS(_, stage->nir, nir_lower_alu_width, opt_vectorize_callback, device);
|
||||
|
|
@ -466,9 +475,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
|||
}
|
||||
|
||||
/* Lower I/O intrinsics to memory instructions. */
|
||||
bool is_last_vgt_stage = radv_is_last_vgt_stage(stage);
|
||||
bool io_to_mem = radv_nir_lower_io_to_mem(device, stage);
|
||||
bool lowered_ngg = stage->info.is_ngg && is_last_vgt_stage;
|
||||
if (lowered_ngg) {
|
||||
radv_lower_ngg(device, stage, gfx_state);
|
||||
} else if (is_last_vgt_stage) {
|
||||
|
|
|
|||
|
|
@ -234,7 +234,7 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively)
|
|||
}
|
||||
|
||||
void
|
||||
radv_optimize_nir_algebraic(nir_shader *nir, bool opt_offsets, bool opt_mqsad, enum amd_gfx_level gfx_level)
|
||||
radv_optimize_nir_algebraic_early(nir_shader *nir)
|
||||
{
|
||||
bool more_algebraic = true;
|
||||
while (more_algebraic) {
|
||||
|
|
@ -258,21 +258,11 @@ radv_optimize_nir_algebraic(nir_shader *nir, bool opt_offsets, bool opt_mqsad, e
|
|||
NIR_PASS(_, nir, nir_opt_remove_phis);
|
||||
NIR_PASS(_, nir, nir_opt_dead_cf);
|
||||
}
|
||||
}
|
||||
|
||||
if (opt_offsets) {
|
||||
const nir_opt_offsets_options offset_options = {
|
||||
.uniform_max = 0,
|
||||
.buffer_max = ~0,
|
||||
.shared_max = UINT16_MAX,
|
||||
.shared_atomic_max = UINT16_MAX,
|
||||
.allow_offset_wrap_cb = ac_nir_allow_offset_wrap_cb,
|
||||
.cb_data = &gfx_level,
|
||||
};
|
||||
NIR_PASS(_, nir, nir_opt_offsets, &offset_options);
|
||||
}
|
||||
if (opt_mqsad)
|
||||
NIR_PASS(_, nir, nir_opt_mqsad);
|
||||
|
||||
void
|
||||
radv_optimize_nir_algebraic_late(nir_shader *nir)
|
||||
{
|
||||
/* Do late algebraic optimization to turn add(a,
|
||||
* neg(b)) back into subs, then the mandatory cleanup
|
||||
* after algebraic. Note that it may produce fnegs,
|
||||
|
|
@ -292,6 +282,28 @@ radv_optimize_nir_algebraic(nir_shader *nir, bool opt_offsets, bool opt_mqsad, e
|
|||
_mesa_set_destroy(skip, NULL);
|
||||
}
|
||||
|
||||
void
|
||||
radv_optimize_nir_algebraic(nir_shader *nir, bool opt_offsets, bool opt_mqsad, enum amd_gfx_level gfx_level)
|
||||
{
|
||||
radv_optimize_nir_algebraic_early(nir);
|
||||
|
||||
if (opt_offsets) {
|
||||
const nir_opt_offsets_options offset_options = {
|
||||
.uniform_max = 0,
|
||||
.buffer_max = ~0,
|
||||
.shared_max = UINT16_MAX,
|
||||
.shared_atomic_max = UINT16_MAX,
|
||||
.allow_offset_wrap_cb = ac_nir_allow_offset_wrap_cb,
|
||||
.cb_data = &gfx_level,
|
||||
};
|
||||
NIR_PASS(_, nir, nir_opt_offsets, &offset_options);
|
||||
}
|
||||
if (opt_mqsad)
|
||||
NIR_PASS(_, nir, nir_opt_mqsad);
|
||||
|
||||
radv_optimize_nir_algebraic_late(nir);
|
||||
}
|
||||
|
||||
static void
|
||||
shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align)
|
||||
{
|
||||
|
|
@ -913,7 +925,7 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
|
|||
assert(info->is_ngg);
|
||||
|
||||
if (info->has_ngg_culling)
|
||||
radv_optimize_nir_algebraic(nir, false, false, pdev->info.gfx_level);
|
||||
radv_optimize_nir_algebraic_late(nir);
|
||||
|
||||
options.num_vertices_per_primitive = num_vertices_per_prim;
|
||||
options.early_prim_export = info->has_ngg_early_prim_export;
|
||||
|
|
|
|||
|
|
@ -487,6 +487,8 @@ struct radv_shader_dma_submission {
|
|||
struct radv_shader_stage;
|
||||
|
||||
void radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively);
|
||||
void radv_optimize_nir_algebraic_early(nir_shader *shader);
|
||||
void radv_optimize_nir_algebraic_late(nir_shader *shader);
|
||||
void radv_optimize_nir_algebraic(nir_shader *shader, bool opt_offsets, bool opt_mqsad,
|
||||
enum amd_gfx_level gfx_level);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue