radeonsi/gfx10: update a tunable max_es_verts_base for NGG

We have to fix the computation so as not to break quads.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
Marek Olšák 2019-07-05 17:53:47 -04:00
parent 79d56e6a4a
commit e08463ac22
3 changed files with 12 additions and 7 deletions

View file

@ -1265,8 +1265,10 @@ void gfx10_ngg_calculate_subgroup_info(struct si_shader *shader)
shader->previous_stage_sel ? shader->previous_stage_sel : gs_sel;
const enum pipe_shader_type gs_type = gs_sel->type;
const unsigned gs_num_invocations = MAX2(gs_sel->gs_num_invocations, 1);
const unsigned input_prim = si_get_input_prim(gs_sel);
/* TODO: Use QUADS as the worst case because of reuse, but triangles
* will always have 1 additional unoccupied vector lane. We could use
* that lane if the worst case was TRIANGLES. */
const unsigned input_prim = si_get_input_prim(gs_sel, PIPE_PRIM_QUADS);
const bool use_adjacency = input_prim >= PIPE_PRIM_LINES_ADJACENCY &&
input_prim <= PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY;
const unsigned max_verts_per_prim = u_vertices_per_prim(input_prim);
@ -1294,7 +1296,7 @@ void gfx10_ngg_calculate_subgroup_info(struct si_shader *shader)
/* All these are per subgroup: */
bool max_vert_out_per_gs_instance = false;
unsigned max_esverts_base = 256;
unsigned max_esverts_base = 128;
unsigned max_gsprims_base = 128; /* default prim group size clamp */
/* Hardware has the following non-natural restrictions on the value

View file

@ -599,7 +599,8 @@ void si_shader_selector_key_vs(struct si_context *sctx,
struct si_shader_selector *vs,
struct si_shader_key *key,
struct si_vs_prolog_bits *prolog_key);
unsigned si_get_input_prim(const struct si_shader_selector *gs);
unsigned si_get_input_prim(const struct si_shader_selector *gs,
unsigned default_worst_case);
/* si_state_draw.c */
void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs,

View file

@ -1064,7 +1064,8 @@ static void si_set_ge_pc_alloc(struct si_screen *sscreen,
S_030980_NUM_PC_LINES((culling ? 256 : 128) * sscreen->info.max_se - 1));
}
unsigned si_get_input_prim(const struct si_shader_selector *gs)
unsigned si_get_input_prim(const struct si_shader_selector *gs,
unsigned default_worst_case)
{
if (gs->type == PIPE_SHADER_GEOMETRY)
return gs->info.properties[TGSI_PROPERTY_GS_INPUT_PRIM];
@ -1078,7 +1079,7 @@ unsigned si_get_input_prim(const struct si_shader_selector *gs)
}
/* TODO: Set this correctly if the primitive type is set in the shader key. */
return PIPE_PRIM_TRIANGLES;
return default_worst_case;
}
/**
@ -1101,7 +1102,8 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
gs_info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
bool es_enable_prim_id = shader->key.mono.u.vs_export_prim_id || es_info->uses_primid;
unsigned gs_num_invocations = MAX2(gs_sel->gs_num_invocations, 1);
unsigned input_prim = si_get_input_prim(gs_sel);
/* Anything above TRIANGLES has the same effect as TRIANGLES here. */
unsigned input_prim = si_get_input_prim(gs_sel, PIPE_PRIM_TRIANGLES);
bool break_wave_at_eoi = false;
struct si_pm4_state *pm4 = si_get_shader_pm4_state(shader);
if (!pm4)