mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 20:28:04 +02:00
ac/nir/lower_ngg: return LDS size for NGG VS and TES from the pass
instead of computing it separately. This is better because ac_nir_lower_ngg_nogs knows the final LDS size anyway, and it will be easier to modify the size calculation this way. Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35351>
This commit is contained in:
parent
7fe603ad82
commit
d79f28e9b3
9 changed files with 37 additions and 60 deletions
|
|
@ -306,16 +306,6 @@ uint32_t ac_compute_scratch_wavesize(const struct radeon_info *info, uint32_t by
|
|||
void ac_get_scratch_tmpring_size(const struct radeon_info *info, unsigned num_scratch_waves,
|
||||
unsigned bytes_per_wave, uint32_t *tmpring_size);
|
||||
|
||||
unsigned
|
||||
ac_ngg_nogs_get_pervertex_lds_size(gl_shader_stage stage,
|
||||
unsigned shader_num_outputs,
|
||||
bool streamout_enabled,
|
||||
bool export_prim_id,
|
||||
bool has_user_edgeflags,
|
||||
bool can_cull,
|
||||
bool uses_instance_id,
|
||||
bool uses_primitive_id);
|
||||
|
||||
unsigned
|
||||
ac_ngg_get_scratch_lds_size(gl_shader_stage stage,
|
||||
unsigned workgroup_size,
|
||||
|
|
|
|||
|
|
@ -208,7 +208,8 @@ typedef struct {
|
|||
} ac_nir_lower_ngg_options;
|
||||
|
||||
bool
|
||||
ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *options);
|
||||
ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *options,
|
||||
uint32_t *out_lds_vertex_size);
|
||||
|
||||
bool
|
||||
ac_nir_lower_ngg_gs(nir_shader *shader, const ac_nir_lower_ngg_options *options);
|
||||
|
|
|
|||
|
|
@ -1547,8 +1547,31 @@ ngg_nogs_gather_outputs(nir_builder *b, struct exec_list *cf_list, lower_ngg_nog
|
|||
}
|
||||
}
|
||||
|
||||
static unsigned
|
||||
ac_ngg_nogs_get_pervertex_lds_size(gl_shader_stage stage,
|
||||
unsigned shader_num_outputs,
|
||||
bool streamout_enabled,
|
||||
bool export_prim_id,
|
||||
bool has_user_edgeflags,
|
||||
bool can_cull,
|
||||
bool uses_instance_id,
|
||||
bool uses_tess_primitive_id)
|
||||
{
|
||||
/* for culling time lds layout only */
|
||||
unsigned culling_pervertex_lds_bytes = can_cull ?
|
||||
ngg_nogs_get_culling_pervertex_lds_size(
|
||||
stage, uses_instance_id, uses_tess_primitive_id, NULL) : 0;
|
||||
|
||||
unsigned pervertex_lds_bytes =
|
||||
ngg_nogs_get_pervertex_lds_size(stage, shader_num_outputs, streamout_enabled,
|
||||
export_prim_id, has_user_edgeflags);
|
||||
|
||||
return MAX2(culling_pervertex_lds_bytes, pervertex_lds_bytes);
|
||||
}
|
||||
|
||||
bool
|
||||
ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *options)
|
||||
ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *options,
|
||||
uint32_t *out_lds_vertex_size)
|
||||
{
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
||||
assert(impl);
|
||||
|
|
@ -1858,31 +1881,14 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
|
|||
NIR_PASS(progress, shader, nir_opt_dead_cf);
|
||||
} while (progress);
|
||||
|
||||
*out_lds_vertex_size =
|
||||
ac_ngg_nogs_get_pervertex_lds_size(shader->info.stage, shader->num_outputs, state.streamout_enabled,
|
||||
options->export_primitive_id, state.has_user_edgeflags,
|
||||
options->can_cull, state.deferred.uses_instance_id,
|
||||
state.deferred.uses_tess_primitive_id);
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned
|
||||
ac_ngg_nogs_get_pervertex_lds_size(gl_shader_stage stage,
|
||||
unsigned shader_num_outputs,
|
||||
bool streamout_enabled,
|
||||
bool export_prim_id,
|
||||
bool has_user_edgeflags,
|
||||
bool can_cull,
|
||||
bool uses_instance_id,
|
||||
bool uses_primitive_id)
|
||||
{
|
||||
/* for culling time lds layout only */
|
||||
unsigned culling_pervertex_lds_bytes = can_cull ?
|
||||
ngg_nogs_get_culling_pervertex_lds_size(
|
||||
stage, uses_instance_id, uses_primitive_id, NULL) : 0;
|
||||
|
||||
unsigned pervertex_lds_bytes =
|
||||
ngg_nogs_get_pervertex_lds_size(stage, shader_num_outputs, streamout_enabled,
|
||||
export_prim_id, has_user_edgeflags);
|
||||
|
||||
return MAX2(culling_pervertex_lds_bytes, pervertex_lds_bytes);
|
||||
}
|
||||
|
||||
unsigned
|
||||
ac_ngg_get_scratch_lds_size(gl_shader_stage stage,
|
||||
unsigned workgroup_size,
|
||||
|
|
|
|||
|
|
@ -814,7 +814,7 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
|
|||
options.export_primitive_id_per_prim = info->outinfo.export_prim_id_per_primitive;
|
||||
options.instance_rate_inputs = gfx_state->vi.instance_rate_inputs << VERT_ATTRIB_GENERIC0;
|
||||
|
||||
NIR_PASS(_, nir, ac_nir_lower_ngg_nogs, &options);
|
||||
NIR_PASS(_, nir, ac_nir_lower_ngg_nogs, &options, &ngg_stage->info.ngg_lds_vertex_size);
|
||||
} else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
|
||||
assert(info->is_ngg);
|
||||
|
||||
|
|
|
|||
|
|
@ -1424,7 +1424,6 @@ static unsigned
|
|||
gfx10_get_ngg_scratch_lds_base(const struct radv_device *device, const struct radv_shader_info *es_info,
|
||||
const struct radv_shader_info *gs_info, const struct gfx10_ngg_info *ngg_info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
uint32_t scratch_lds_base;
|
||||
|
||||
if (gs_info) {
|
||||
|
|
@ -1433,17 +1432,8 @@ gfx10_get_ngg_scratch_lds_base(const struct radv_device *device, const struct ra
|
|||
|
||||
scratch_lds_base = ALIGN(esgs_ring_lds_bytes + gs_total_out_vtx_bytes, 8u /* for the repacking code */);
|
||||
} else {
|
||||
const bool uses_instanceid = es_info->vs.needs_instance_id;
|
||||
const bool uses_primitive_id = es_info->uses_prim_id;
|
||||
const bool streamout_enabled = es_info->so.enabled_stream_buffers_mask && pdev->use_ngg_streamout;
|
||||
const uint32_t num_outputs =
|
||||
es_info->stage == MESA_SHADER_VERTEX ? es_info->vs.num_outputs : es_info->tes.num_outputs;
|
||||
unsigned pervertex_lds_bytes = ac_ngg_nogs_get_pervertex_lds_size(
|
||||
es_info->stage, num_outputs, streamout_enabled, es_info->outinfo.export_prim_id, false, /* user edge flag */
|
||||
es_info->has_ngg_culling, uses_instanceid, uses_primitive_id);
|
||||
|
||||
assert(ngg_info->hw_max_esverts <= 256);
|
||||
unsigned total_es_lds_bytes = pervertex_lds_bytes * ngg_info->hw_max_esverts;
|
||||
unsigned total_es_lds_bytes = es_info->ngg_lds_vertex_size * ngg_info->hw_max_esverts;
|
||||
|
||||
scratch_lds_base = ALIGN(total_es_lds_bytes, 8u);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -100,6 +100,7 @@ struct radv_shader_info {
|
|||
bool has_xfb_query;
|
||||
uint32_t num_tess_patches;
|
||||
uint32_t esgs_itemsize; /* Only for VS or TES as ES */
|
||||
uint32_t ngg_lds_vertex_size;
|
||||
struct radv_vs_output_info outinfo;
|
||||
unsigned workgroup_size;
|
||||
bool force_vrs_per_vertex;
|
||||
|
|
|
|||
|
|
@ -110,19 +110,7 @@ retry_select_mode:
|
|||
}
|
||||
} else {
|
||||
/* VS and TES. */
|
||||
bool uses_primitive_id = gs_sel->info.uses_primid;
|
||||
|
||||
if (gs_stage == MESA_SHADER_TESS_EVAL)
|
||||
uses_primitive_id |= shader->key.ge.mono.u.vs_export_prim_id;
|
||||
|
||||
esvert_lds_size = ac_ngg_nogs_get_pervertex_lds_size(
|
||||
gs_stage, gs_sel->info.num_outputs,
|
||||
shader->info.num_streamout_vec4s != 0,
|
||||
shader->key.ge.mono.u.vs_export_prim_id,
|
||||
gfx10_ngg_writes_user_edgeflags(shader),
|
||||
si_shader_culling_enabled(shader),
|
||||
shader->info.uses_instance_id,
|
||||
uses_primitive_id) / 4;
|
||||
esvert_lds_size = shader->info.ngg_lds_vertex_size / 4;
|
||||
}
|
||||
|
||||
unsigned max_gsprims = max_gsprims_base;
|
||||
|
|
|
|||
|
|
@ -1172,7 +1172,7 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir,
|
|||
options.instance_rate_inputs = instance_rate_inputs;
|
||||
options.user_clip_plane_enable_mask = clip_plane_enable;
|
||||
|
||||
NIR_PASS_V(nir, ac_nir_lower_ngg_nogs, &options);
|
||||
NIR_PASS_V(nir, ac_nir_lower_ngg_nogs, &options, &shader->info.ngg_lds_vertex_size);
|
||||
} else {
|
||||
assert(nir->info.stage == MESA_SHADER_GEOMETRY);
|
||||
|
||||
|
|
|
|||
|
|
@ -240,6 +240,7 @@ struct si_shader_variant_info {
|
|||
uint8_t num_streamout_vec4s;
|
||||
unsigned private_mem_vgprs;
|
||||
unsigned max_simd_waves;
|
||||
uint32_t ngg_lds_vertex_size;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue