radv: Don't change LDS_SIZE for NGG culling shaders.

There was no measurable perf benefit from this optimization,
and it made the code messy and difficult to refactor.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20980>
This commit is contained in:
Timur Kristóf 2023-01-30 02:45:05 +01:00 committed by Marge Bot
parent ab87b19088
commit 8457dfec47
3 changed files with 0 additions and 48 deletions

View file

@ -1874,19 +1874,6 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
radeon_emit_array(cmd_buffer->cs, pipeline->base.cs.buf, pipeline->base.cs.cdw);
if (pipeline->has_ngg_culling &&
pipeline->last_vgt_api_stage != MESA_SHADER_GEOMETRY &&
!cmd_buffer->state.last_nggc_settings) {
/* The already emitted RSRC2 contains the LDS required for NGG culling.
* Culling is currently disabled, so re-emit RSRC2 to reduce LDS usage.
* API GS always needs LDS, so this isn't useful there.
*/
struct radv_shader *v = pipeline->base.shaders[pipeline->last_vgt_api_stage];
radeon_set_sh_reg(cmd_buffer->cs, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
(v->config.rsrc2 & C_00B22C_LDS_SIZE) |
S_00B22C_LDS_SIZE(v->info.num_lds_blocks_when_not_culling));
}
if (!cmd_buffer->state.emitted_graphics_pipeline ||
cmd_buffer->state.emitted_graphics_pipeline->base.ctx_cs.cdw != pipeline->base.ctx_cs.cdw ||
cmd_buffer->state.emitted_graphics_pipeline->base.ctx_cs_hash != pipeline->base.ctx_cs_hash ||
@ -8569,27 +8556,6 @@ radv_emit_ngg_culling_state(struct radv_cmd_buffer *cmd_buffer, const struct rad
radeon_set_sh_reg(cmd_buffer->cs, base_reg + nggc_sgpr_idx * 4, nggc_settings);
}
/* These only need to be emitted when culling is turned on or off,
* but not when it stays on and just some settings change.
*/
if (!!cmd_buffer->state.last_nggc_settings != !!nggc_settings) {
uint32_t rsrc2 = v->config.rsrc2;
if (!nggc_settings) {
/* Allocate less LDS when culling is disabled. (But GS always needs it.) */
if (stage != MESA_SHADER_GEOMETRY)
rsrc2 = (rsrc2 & C_00B22C_LDS_SIZE) | S_00B22C_LDS_SIZE(v->info.num_lds_blocks_when_not_culling);
}
/* When the pipeline is dirty and not yet emitted, don't write it here
* because radv_emit_graphics_pipeline will overwrite this register.
*/
if (!(cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) ||
cmd_buffer->state.emitted_graphics_pipeline == pipeline) {
radeon_set_sh_reg(cmd_buffer->cs, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, rsrc2);
}
}
cmd_buffer->state.last_nggc_settings = nggc_settings;
cmd_buffer->state.last_nggc_settings_sgpr_idx = nggc_sgpr_idx;
}

View file

@ -253,7 +253,6 @@ struct radv_shader_info {
bool has_ngg_early_prim_export;
bool has_ngg_prim_query;
bool has_ngg_xfb_query;
uint32_t num_lds_blocks_when_not_culling;
uint32_t num_tess_patches;
uint32_t esgs_itemsize; /* Only for VS or TES as ES */
struct radv_vs_output_info outinfo;

View file

@ -1284,19 +1284,6 @@ radv_determine_ngg_settings(struct radv_device *device, struct radv_pipeline_sta
nir_function_impl *impl = nir_shader_get_entrypoint(es_stage->nir);
es_stage->info.has_ngg_early_prim_export = exec_list_is_singular(&impl->body);
/* Invocations that process an input vertex */
const struct gfx10_ngg_info *ngg_info = &es_stage->info.ngg_info;
unsigned max_vtx_in = MIN2(256, ngg_info->hw_max_esverts);
unsigned lds_bytes_if_culling_off = 0;
/* We need LDS space when VS needs to export the primitive ID. */
if (es_stage->stage == MESA_SHADER_VERTEX && es_stage->info.outinfo.export_prim_id)
lds_bytes_if_culling_off = max_vtx_in * 4u;
es_stage->info.num_lds_blocks_when_not_culling =
DIV_ROUND_UP(lds_bytes_if_culling_off,
device->physical_device->rad_info.lds_encode_granularity);
/* NGG passthrough mode should be disabled when culling and when the vertex shader
* exports the primitive ID.
*/