ac/nir/lower_ngg: return LDS size for NGG VS and TES from the pass

instead of computing it separately. This is better because
ac_nir_lower_ngg_nogs knows the final LDS size anyway, and it will be
easier to modify the size calculation this way.

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35351>
This commit is contained in:
Marek Olšák 2025-05-28 08:30:31 -04:00 committed by Marge Bot
parent 7fe603ad82
commit d79f28e9b3
9 changed files with 37 additions and 60 deletions

View file

@ -306,16 +306,6 @@ uint32_t ac_compute_scratch_wavesize(const struct radeon_info *info, uint32_t by
void ac_get_scratch_tmpring_size(const struct radeon_info *info, unsigned num_scratch_waves,
unsigned bytes_per_wave, uint32_t *tmpring_size);
unsigned
ac_ngg_nogs_get_pervertex_lds_size(gl_shader_stage stage,
unsigned shader_num_outputs,
bool streamout_enabled,
bool export_prim_id,
bool has_user_edgeflags,
bool can_cull,
bool uses_instance_id,
bool uses_primitive_id);
unsigned
ac_ngg_get_scratch_lds_size(gl_shader_stage stage,
unsigned workgroup_size,

View file

@ -208,7 +208,8 @@ typedef struct {
} ac_nir_lower_ngg_options;
bool
ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *options);
ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *options,
uint32_t *out_lds_vertex_size);
bool
ac_nir_lower_ngg_gs(nir_shader *shader, const ac_nir_lower_ngg_options *options);

View file

@ -1547,8 +1547,31 @@ ngg_nogs_gather_outputs(nir_builder *b, struct exec_list *cf_list, lower_ngg_nog
}
}
static unsigned
ac_ngg_nogs_get_pervertex_lds_size(gl_shader_stage stage,
unsigned shader_num_outputs,
bool streamout_enabled,
bool export_prim_id,
bool has_user_edgeflags,
bool can_cull,
bool uses_instance_id,
bool uses_tess_primitive_id)
{
/* for culling time lds layout only */
unsigned culling_pervertex_lds_bytes = can_cull ?
ngg_nogs_get_culling_pervertex_lds_size(
stage, uses_instance_id, uses_tess_primitive_id, NULL) : 0;
unsigned pervertex_lds_bytes =
ngg_nogs_get_pervertex_lds_size(stage, shader_num_outputs, streamout_enabled,
export_prim_id, has_user_edgeflags);
return MAX2(culling_pervertex_lds_bytes, pervertex_lds_bytes);
}
bool
ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *options)
ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *options,
uint32_t *out_lds_vertex_size)
{
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
assert(impl);
@ -1858,31 +1881,14 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
NIR_PASS(progress, shader, nir_opt_dead_cf);
} while (progress);
*out_lds_vertex_size =
ac_ngg_nogs_get_pervertex_lds_size(shader->info.stage, shader->num_outputs, state.streamout_enabled,
options->export_primitive_id, state.has_user_edgeflags,
options->can_cull, state.deferred.uses_instance_id,
state.deferred.uses_tess_primitive_id);
return true;
}
unsigned
ac_ngg_nogs_get_pervertex_lds_size(gl_shader_stage stage,
unsigned shader_num_outputs,
bool streamout_enabled,
bool export_prim_id,
bool has_user_edgeflags,
bool can_cull,
bool uses_instance_id,
bool uses_primitive_id)
{
/* for culling time lds layout only */
unsigned culling_pervertex_lds_bytes = can_cull ?
ngg_nogs_get_culling_pervertex_lds_size(
stage, uses_instance_id, uses_primitive_id, NULL) : 0;
unsigned pervertex_lds_bytes =
ngg_nogs_get_pervertex_lds_size(stage, shader_num_outputs, streamout_enabled,
export_prim_id, has_user_edgeflags);
return MAX2(culling_pervertex_lds_bytes, pervertex_lds_bytes);
}
unsigned
ac_ngg_get_scratch_lds_size(gl_shader_stage stage,
unsigned workgroup_size,

View file

@ -814,7 +814,7 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
options.export_primitive_id_per_prim = info->outinfo.export_prim_id_per_primitive;
options.instance_rate_inputs = gfx_state->vi.instance_rate_inputs << VERT_ATTRIB_GENERIC0;
NIR_PASS(_, nir, ac_nir_lower_ngg_nogs, &options);
NIR_PASS(_, nir, ac_nir_lower_ngg_nogs, &options, &ngg_stage->info.ngg_lds_vertex_size);
} else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
assert(info->is_ngg);

View file

@ -1424,7 +1424,6 @@ static unsigned
gfx10_get_ngg_scratch_lds_base(const struct radv_device *device, const struct radv_shader_info *es_info,
const struct radv_shader_info *gs_info, const struct gfx10_ngg_info *ngg_info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t scratch_lds_base;
if (gs_info) {
@ -1433,17 +1432,8 @@ gfx10_get_ngg_scratch_lds_base(const struct radv_device *device, const struct ra
scratch_lds_base = ALIGN(esgs_ring_lds_bytes + gs_total_out_vtx_bytes, 8u /* for the repacking code */);
} else {
const bool uses_instanceid = es_info->vs.needs_instance_id;
const bool uses_primitive_id = es_info->uses_prim_id;
const bool streamout_enabled = es_info->so.enabled_stream_buffers_mask && pdev->use_ngg_streamout;
const uint32_t num_outputs =
es_info->stage == MESA_SHADER_VERTEX ? es_info->vs.num_outputs : es_info->tes.num_outputs;
unsigned pervertex_lds_bytes = ac_ngg_nogs_get_pervertex_lds_size(
es_info->stage, num_outputs, streamout_enabled, es_info->outinfo.export_prim_id, false, /* user edge flag */
es_info->has_ngg_culling, uses_instanceid, uses_primitive_id);
assert(ngg_info->hw_max_esverts <= 256);
unsigned total_es_lds_bytes = pervertex_lds_bytes * ngg_info->hw_max_esverts;
unsigned total_es_lds_bytes = es_info->ngg_lds_vertex_size * ngg_info->hw_max_esverts;
scratch_lds_base = ALIGN(total_es_lds_bytes, 8u);
}

View file

@ -100,6 +100,7 @@ struct radv_shader_info {
bool has_xfb_query;
uint32_t num_tess_patches;
uint32_t esgs_itemsize; /* Only for VS or TES as ES */
uint32_t ngg_lds_vertex_size;
struct radv_vs_output_info outinfo;
unsigned workgroup_size;
bool force_vrs_per_vertex;

View file

@ -110,19 +110,7 @@ retry_select_mode:
}
} else {
/* VS and TES. */
bool uses_primitive_id = gs_sel->info.uses_primid;
if (gs_stage == MESA_SHADER_TESS_EVAL)
uses_primitive_id |= shader->key.ge.mono.u.vs_export_prim_id;
esvert_lds_size = ac_ngg_nogs_get_pervertex_lds_size(
gs_stage, gs_sel->info.num_outputs,
shader->info.num_streamout_vec4s != 0,
shader->key.ge.mono.u.vs_export_prim_id,
gfx10_ngg_writes_user_edgeflags(shader),
si_shader_culling_enabled(shader),
shader->info.uses_instance_id,
uses_primitive_id) / 4;
esvert_lds_size = shader->info.ngg_lds_vertex_size / 4;
}
unsigned max_gsprims = max_gsprims_base;

View file

@ -1172,7 +1172,7 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir,
options.instance_rate_inputs = instance_rate_inputs;
options.user_clip_plane_enable_mask = clip_plane_enable;
NIR_PASS_V(nir, ac_nir_lower_ngg_nogs, &options);
NIR_PASS_V(nir, ac_nir_lower_ngg_nogs, &options, &shader->info.ngg_lds_vertex_size);
} else {
assert(nir->info.stage == MESA_SHADER_GEOMETRY);

View file

@ -240,6 +240,7 @@ struct si_shader_variant_info {
uint8_t num_streamout_vec4s;
unsigned private_mem_vgprs;
unsigned max_simd_waves;
uint32_t ngg_lds_vertex_size;
};
#endif