mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-19 03:50:26 +01:00
ac/nir: allow smaller workgroups for GS
It's not good for performance, but it's possible to use for debugging. Running single-wave GS workgroups could work around any LDS race conditions. Setting the workgroup size to 64 reliably works around GLCTS *primitive_counter*line failures, indicating streamout data corruption with multi-wave GS workgroups. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38328>
This commit is contained in:
parent
ff8df8712e
commit
9bd2c6dcb2
4 changed files with 22 additions and 14 deletions
|
|
@ -1349,9 +1349,11 @@ ac_legacy_gs_compute_subgroup_info(enum mesa_prim input_prim, unsigned gs_vertic
|
|||
*/
|
||||
bool
|
||||
ac_ngg_compute_subgroup_info(enum amd_gfx_level gfx_level, mesa_shader_stage es_stage, bool is_gs,
|
||||
enum mesa_prim input_prim, unsigned gs_vertices_out, unsigned gs_invocations,
|
||||
unsigned max_workgroup_size, unsigned wave_size, unsigned esgs_vertex_stride,
|
||||
unsigned ngg_lds_vertex_size, unsigned ngg_lds_scratch_size, bool tess_turns_off_ngg,
|
||||
enum mesa_prim input_prim, unsigned gs_vertices_out,
|
||||
unsigned gs_invocations, unsigned target_workgroup_size,
|
||||
unsigned max_workgroup_size, unsigned wave_size,
|
||||
unsigned esgs_vertex_stride, unsigned ngg_lds_vertex_size,
|
||||
unsigned ngg_lds_scratch_size, bool tess_turns_off_ngg,
|
||||
unsigned max_esgs_lds_padding, ac_ngg_subgroup_info *out)
|
||||
{
|
||||
const unsigned gs_num_invocations = MAX2(gs_invocations, 1);
|
||||
|
|
@ -1373,16 +1375,19 @@ ac_ngg_compute_subgroup_info(enum amd_gfx_level gfx_level, mesa_shader_stage es_
|
|||
bool max_vert_out_per_gs_instance = false;
|
||||
unsigned max_gsprims_base, max_esverts_base;
|
||||
|
||||
max_gsprims_base = max_esverts_base = max_workgroup_size;
|
||||
/* In the worst case, we can run 1 GS invocation per workgroup. */
|
||||
assert(!is_gs || gs_vertices_out <= max_workgroup_size);
|
||||
|
||||
max_gsprims_base = max_esverts_base = target_workgroup_size;
|
||||
|
||||
if (is_gs) {
|
||||
bool force_multi_cycling = false;
|
||||
unsigned max_out_verts_per_gsprim = gs_vertices_out * gs_num_invocations;
|
||||
|
||||
retry_select_mode:
|
||||
if (max_out_verts_per_gsprim <= 256 && !force_multi_cycling) {
|
||||
if (max_out_verts_per_gsprim <= max_workgroup_size && !force_multi_cycling) {
|
||||
if (max_out_verts_per_gsprim) {
|
||||
max_gsprims_base = MIN2(max_gsprims_base, 256 / max_out_verts_per_gsprim);
|
||||
max_gsprims_base = MIN2(max_gsprims_base, max_workgroup_size / max_out_verts_per_gsprim);
|
||||
}
|
||||
} else {
|
||||
/* Use special multi-cycling mode in which each GS
|
||||
|
|
@ -1483,7 +1488,7 @@ retry_select_mode:
|
|||
: is_gs
|
||||
? max_gsprims * gs_num_invocations * gs_vertices_out
|
||||
: max_esverts;
|
||||
assert(max_out_vertices <= 256);
|
||||
assert(max_out_vertices <= max_workgroup_size);
|
||||
|
||||
out->hw_max_esverts = max_esverts;
|
||||
out->max_gsprims = max_gsprims;
|
||||
|
|
@ -1504,6 +1509,6 @@ retry_select_mode:
|
|||
|
||||
/* If asserts are disabled, we use the same conditions to return false */
|
||||
return max_esverts >= max_verts_per_prim && max_gsprims >= 1 &&
|
||||
max_out_vertices <= 256 &&
|
||||
max_out_vertices <= max_workgroup_size &&
|
||||
out->hw_max_esverts >= min_esverts;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -336,9 +336,11 @@ typedef struct {
|
|||
|
||||
bool
|
||||
ac_ngg_compute_subgroup_info(enum amd_gfx_level gfx_level, mesa_shader_stage es_stage, bool is_gs,
|
||||
enum mesa_prim input_prim, unsigned gs_vertices_out, unsigned gs_invocations,
|
||||
unsigned max_workgroup_size, unsigned wave_size, unsigned esgs_vertex_stride,
|
||||
unsigned ngg_lds_vertex_size, unsigned ngg_lds_scratch_size, bool tess_turns_off_ngg,
|
||||
enum mesa_prim input_prim, unsigned gs_vertices_out,
|
||||
unsigned gs_invocations, unsigned target_workgroup_size,
|
||||
unsigned max_workgroup_size, unsigned wave_size,
|
||||
unsigned esgs_vertex_stride, unsigned ngg_lds_vertex_size,
|
||||
unsigned ngg_lds_scratch_size, bool tess_turns_off_ngg,
|
||||
unsigned max_esgs_lds_padding, ac_ngg_subgroup_info *out);
|
||||
|
||||
static unsigned inline
|
||||
|
|
|
|||
|
|
@ -1277,8 +1277,8 @@ gfx10_get_ngg_info(const struct radv_device *device, struct radv_shader_info *es
|
|||
ac_ngg_subgroup_info info;
|
||||
|
||||
ac_ngg_compute_subgroup_info(gfx_level, es_info->stage, !!gs_info, input_prim, gs_vertices_out, gs_num_invocations,
|
||||
128, stage_info->wave_size, es_info->esgs_itemsize, stage_info->ngg_lds_vertex_size,
|
||||
stage_info->ngg_lds_scratch_size, false, 0, &info);
|
||||
128, 256, stage_info->wave_size, es_info->esgs_itemsize,
|
||||
stage_info->ngg_lds_vertex_size, stage_info->ngg_lds_scratch_size, false, 0, &info);
|
||||
|
||||
out->hw_max_esverts = info.hw_max_esverts;
|
||||
out->max_gsprims = info.max_gsprims;
|
||||
|
|
|
|||
|
|
@ -1887,11 +1887,12 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler
|
|||
const unsigned input_prim = si_get_input_prim(gs_sel, &shader->key, false);
|
||||
unsigned gs_vertices_out = gs_sel->stage == MESA_SHADER_GEOMETRY ? gs_sel->info.base.gs.vertices_out : 0;
|
||||
unsigned gs_invocations = gs_sel->stage == MESA_SHADER_GEOMETRY ? gs_sel->info.base.gs.invocations : 0;
|
||||
unsigned max_workgroup_size = si_get_max_workgroup_size(shader);
|
||||
|
||||
if (!ac_ngg_compute_subgroup_info(gs_sel->screen->info.gfx_level, es_sel->stage,
|
||||
gs_sel->stage == MESA_SHADER_GEOMETRY,
|
||||
input_prim, gs_vertices_out, gs_invocations,
|
||||
si_get_max_workgroup_size(shader), shader->wave_size,
|
||||
max_workgroup_size, max_workgroup_size, shader->wave_size,
|
||||
es_sel->info.esgs_vertex_stride, shader->info.ngg_lds_vertex_size,
|
||||
shader->info.ngg_lds_scratch_size, gs_sel->tess_turns_off_ngg,
|
||||
gs_sel->stage == MESA_SHADER_GEOMETRY ? 255 : 0, &shader->ngg.info)) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue