radeonsi/gfx10: add as_ngg variant for TES as ES to select Wave32/64

Legacy GS has to use Wave64, so TES before GS has to use Wave64 too.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
Marek Olšák 2019-07-12 19:49:30 -04:00
parent 88efb63caf
commit 8f72f137ad
4 changed files with 32 additions and 15 deletions

View file

@ -1895,13 +1895,14 @@ static inline bool si_compute_prim_discard_enabled(struct si_context *sctx)
static inline unsigned si_get_wave_size(struct si_screen *sscreen, static inline unsigned si_get_wave_size(struct si_screen *sscreen,
enum pipe_shader_type shader_type, enum pipe_shader_type shader_type,
bool ngg) bool ngg, bool es)
{ {
if (shader_type == PIPE_SHADER_COMPUTE) if (shader_type == PIPE_SHADER_COMPUTE)
return sscreen->compute_wave_size; return sscreen->compute_wave_size;
else if (shader_type == PIPE_SHADER_FRAGMENT) else if (shader_type == PIPE_SHADER_FRAGMENT)
return sscreen->ps_wave_size; return sscreen->ps_wave_size;
else if (shader_type == PIPE_SHADER_GEOMETRY && !ngg) /* legacy GS only supports Wave64 */ else if ((shader_type == PIPE_SHADER_TESS_EVAL && es && !ngg) ||
(shader_type == PIPE_SHADER_GEOMETRY && !ngg)) /* legacy GS only supports Wave64 */
return 64; return 64;
else else
return sscreen->ge_wave_size; return sscreen->ge_wave_size;
@ -1910,7 +1911,7 @@ static inline unsigned si_get_wave_size(struct si_screen *sscreen,
static inline unsigned si_get_shader_wave_size(struct si_shader *shader) static inline unsigned si_get_shader_wave_size(struct si_shader *shader)
{ {
return si_get_wave_size(shader->selector->screen, shader->selector->type, return si_get_wave_size(shader->selector->screen, shader->selector->type,
shader->key.as_ngg); shader->key.as_ngg, shader->key.as_es);
} }
#define PRINT_ERR(fmt, args...) \ #define PRINT_ERR(fmt, args...) \

View file

@ -5727,7 +5727,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
shader->is_gs_copy_shader = true; shader->is_gs_copy_shader = true;
si_init_shader_ctx(&ctx, sscreen, compiler, si_init_shader_ctx(&ctx, sscreen, compiler,
si_get_wave_size(sscreen, PIPE_SHADER_VERTEX, false)); si_get_wave_size(sscreen, PIPE_SHADER_VERTEX, false, false));
ctx.shader = shader; ctx.shader = shader;
ctx.type = PIPE_SHADER_VERTEX; ctx.type = PIPE_SHADER_VERTEX;
@ -6172,7 +6172,8 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx)
} }
} }
if (shader->key.as_ngg && ctx->type != PIPE_SHADER_GEOMETRY) { if (ctx->type != PIPE_SHADER_GEOMETRY &&
(shader->key.as_ngg && !shader->key.as_es)) {
/* Unconditionally declare scratch space base for streamout and /* Unconditionally declare scratch space base for streamout and
* vertex compaction. Whether space is actually allocated is * vertex compaction. Whether space is actually allocated is
* determined during linking / PM4 creation. * determined during linking / PM4 creation.
@ -6219,13 +6220,13 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx)
ctx->param_merged_wave_info, 0); ctx->param_merged_wave_info, 0);
} else if (ctx->type == PIPE_SHADER_TESS_CTRL || } else if (ctx->type == PIPE_SHADER_TESS_CTRL ||
ctx->type == PIPE_SHADER_GEOMETRY || ctx->type == PIPE_SHADER_GEOMETRY ||
shader->key.as_ngg) { (shader->key.as_ngg && !shader->key.as_es)) {
LLVMValueRef num_threads; LLVMValueRef num_threads;
bool nested_barrier; bool nested_barrier;
if (!shader->is_monolithic || if (!shader->is_monolithic ||
(ctx->type == PIPE_SHADER_TESS_EVAL && (ctx->type == PIPE_SHADER_TESS_EVAL &&
shader->key.as_ngg)) (shader->key.as_ngg && !shader->key.as_es)))
ac_init_exec_full_mask(&ctx->ac); ac_init_exec_full_mask(&ctx->ac);
if (ctx->type == PIPE_SHADER_TESS_CTRL || if (ctx->type == PIPE_SHADER_TESS_CTRL ||
@ -7048,6 +7049,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
struct si_shader shader_es = {}; struct si_shader shader_es = {};
shader_es.selector = es; shader_es.selector = es;
shader_es.key.as_es = 1; shader_es.key.as_es = 1;
shader_es.key.as_ngg = shader->key.as_ngg;
shader_es.key.mono = shader->key.mono; shader_es.key.mono = shader->key.mono;
shader_es.key.opt = shader->key.opt; shader_es.key.opt = shader->key.opt;
shader_es.is_monolithic = true; shader_es.is_monolithic = true;
@ -7305,7 +7307,8 @@ si_get_shader_part(struct si_screen *sscreen,
struct si_shader_context ctx; struct si_shader_context ctx;
si_init_shader_ctx(&ctx, sscreen, compiler, si_init_shader_ctx(&ctx, sscreen, compiler,
si_get_wave_size(sscreen, type, shader.key.as_ngg)); si_get_wave_size(sscreen, type, shader.key.as_ngg,
shader.key.as_es));
ctx.shader = &shader; ctx.shader = &shader;
ctx.type = type; ctx.type = type;
@ -7703,10 +7706,15 @@ static bool si_shader_select_gs_parts(struct si_screen *sscreen,
struct pipe_debug_callback *debug) struct pipe_debug_callback *debug)
{ {
if (sscreen->info.chip_class >= GFX9) { if (sscreen->info.chip_class >= GFX9) {
struct si_shader *es_main_part = struct si_shader *es_main_part;
shader->key.part.gs.es->main_shader_part_es; enum pipe_shader_type es_type = shader->key.part.gs.es->type;
if (shader->key.part.gs.es->type == PIPE_SHADER_VERTEX && if (es_type == PIPE_SHADER_TESS_EVAL && shader->key.as_ngg)
es_main_part = shader->key.part.gs.es->main_shader_part_ngg_es;
else
es_main_part = shader->key.part.gs.es->main_shader_part_es;
if (es_type == PIPE_SHADER_VERTEX &&
!si_get_vs_prolog(sscreen, compiler, shader, debug, es_main_part, !si_get_vs_prolog(sscreen, compiler, shader, debug, es_main_part,
&shader->key.part.gs.vs_prolog)) &shader->key.part.gs.vs_prolog))
return false; return false;

View file

@ -337,6 +337,7 @@ struct si_shader_selector {
struct si_shader *main_shader_part_ls; /* as_ls is set in the key */ struct si_shader *main_shader_part_ls; /* as_ls is set in the key */
struct si_shader *main_shader_part_es; /* as_es is set in the key */ struct si_shader *main_shader_part_es; /* as_es is set in the key */
struct si_shader *main_shader_part_ngg; /* as_ngg is set in the key */ struct si_shader *main_shader_part_ngg; /* as_ngg is set in the key */
struct si_shader *main_shader_part_ngg_es; /* for Wave32 TES before legacy GS */
struct si_shader *gs_copy_shader; struct si_shader *gs_copy_shader;
@ -789,6 +790,8 @@ si_get_main_shader_part(struct si_shader_selector *sel,
{ {
if (key->as_ls) if (key->as_ls)
return &sel->main_shader_part_ls; return &sel->main_shader_part_ls;
if (key->as_es && key->as_ngg)
return &sel->main_shader_part_ngg_es;
if (key->as_es) if (key->as_es)
return &sel->main_shader_part_es; return &sel->main_shader_part_es;
if (key->as_ngg) if (key->as_ngg)

View file

@ -1852,10 +1852,11 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
key->mono.u.ff_tcs_inputs_to_copy = sctx->vs_shader.cso->outputs_written; key->mono.u.ff_tcs_inputs_to_copy = sctx->vs_shader.cso->outputs_written;
break; break;
case PIPE_SHADER_TESS_EVAL: case PIPE_SHADER_TESS_EVAL:
key->as_ngg = stages_key.u.ngg;
if (sctx->gs_shader.cso) if (sctx->gs_shader.cso)
key->as_es = 1; key->as_es = 1;
else { else {
key->as_ngg = stages_key.u.ngg;
si_shader_selector_key_hw_vs(sctx, sel, key); si_shader_selector_key_hw_vs(sctx, sel, key);
if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid) if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
@ -2269,6 +2270,10 @@ current_not_ready:
else else
assert(0); assert(0);
if (sel->type == PIPE_SHADER_GEOMETRY &&
previous_stage_sel->type == PIPE_SHADER_TESS_EVAL)
shader1_key.as_ngg = key->as_ngg;
mtx_lock(&previous_stage_sel->mutex); mtx_lock(&previous_stage_sel->mutex);
ok = si_check_missing_main_part(sscreen, ok = si_check_missing_main_part(sscreen,
previous_stage_sel, previous_stage_sel,
@ -2429,7 +2434,7 @@ static void si_init_shader_selector_async(void *job, int thread_index)
if (sel->nir) { if (sel->nir) {
/* TODO: GS always sets wave size = default. Legacy GS will have /* TODO: GS always sets wave size = default. Legacy GS will have
* incorrect subgroup_size and ballot_bit_size. */ * incorrect subgroup_size and ballot_bit_size. */
si_lower_nir(sel, si_get_wave_size(sscreen, sel->type, true)); si_lower_nir(sel, si_get_wave_size(sscreen, sel->type, true, false));
} }
/* Compile the main shader part for use with a prolog and/or epilog. /* Compile the main shader part for use with a prolog and/or epilog.
@ -2455,9 +2460,9 @@ static void si_init_shader_selector_async(void *job, int thread_index)
sel->so.num_outputs != 0, sel->so.num_outputs != 0,
&shader->key); &shader->key);
if (sscreen->info.chip_class >= GFX10 && if (sscreen->info.chip_class >= GFX10 &&
(((sel->type == PIPE_SHADER_VERTEX || ((sel->type == PIPE_SHADER_VERTEX &&
sel->type == PIPE_SHADER_TESS_EVAL) &&
!shader->key.as_ls && !shader->key.as_es) || !shader->key.as_ls && !shader->key.as_es) ||
sel->type == PIPE_SHADER_TESS_EVAL ||
sel->type == PIPE_SHADER_GEOMETRY)) sel->type == PIPE_SHADER_GEOMETRY))
shader->key.as_ngg = 1; shader->key.as_ngg = 1;