diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index c9df3cca914..4c53477e92b 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1289,7 +1289,8 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f) bool si_vs_needs_prolog(const struct si_shader_selector *sel, const struct si_vs_prolog_bits *prolog_key, - const union si_shader_key *key, bool ngg_cull_shader) + const union si_shader_key *key, bool ngg_cull_shader, + bool is_gs) { assert(sel->info.stage == MESA_SHADER_VERTEX); @@ -1297,7 +1298,7 @@ bool si_vs_needs_prolog(const struct si_shader_selector *sel, * VS prolog. */ return sel->vs_needs_prolog || prolog_key->ls_vgpr_fix || /* The 2nd VS prolog loads input VGPRs from LDS */ - (key->ge.opt.ngg_culling && !ngg_cull_shader); + (key->ge.opt.ngg_culling && !ngg_cull_shader && !is_gs); } /** @@ -1323,7 +1324,8 @@ void si_get_vs_prolog_key(const struct si_shader_info *info, unsigned num_input_ key->vs_prolog.as_es = shader_out->key.ge.as_es; key->vs_prolog.as_ngg = shader_out->key.ge.as_ngg; - if (!ngg_cull_shader && shader_out->key.ge.opt.ngg_culling) + if (shader_out->selector->info.stage != MESA_SHADER_GEOMETRY && + !ngg_cull_shader && shader_out->key.ge.opt.ngg_culling) key->vs_prolog.load_vgprs_after_culling = 1; if (shader_out->selector->info.stage == MESA_SHADER_TESS_CTRL) { @@ -1652,7 +1654,8 @@ static bool si_get_vs_prolog(struct si_screen *sscreen, struct ac_llvm_compiler { struct si_shader_selector *vs = main_part->selector; - if (!si_vs_needs_prolog(vs, key, &shader->key, false)) + if (!si_vs_needs_prolog(vs, key, &shader->key, false, + shader->selector->info.stage == MESA_SHADER_GEOMETRY)) return true; /* Get the prolog. */ diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 2acdd838bbf..4a7d216b77a 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -171,7 +171,7 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader); unsigned si_get_max_workgroup_size(const struct si_shader *shader); bool si_vs_needs_prolog(const struct si_shader_selector *sel, const struct si_vs_prolog_bits *prolog_key, - const union si_shader_key *key, bool ngg_cull_shader); + const union si_shader_key *key, bool ngg_cull_shader, bool is_gs); void si_get_vs_prolog_key(const struct si_shader_info *info, unsigned num_input_sgprs, bool ngg_cull_shader, const struct si_vs_prolog_bits *prolog_key, struct si_shader *shader_out, union si_shader_part_key *key); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index dd944e7f8b5..063b24d50e1 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -955,7 +955,8 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad if ((!shader->is_monolithic || no_wrapper_func) && (ctx->stage == MESA_SHADER_TESS_EVAL || (ctx->stage == MESA_SHADER_VERTEX && - !si_vs_needs_prolog(sel, &shader->key.ge.part.vs.prolog, &shader->key, ngg_cull_shader)))) + !si_vs_needs_prolog(sel, &shader->key.ge.part.vs.prolog, &shader->key, ngg_cull_shader, + false)))) ac_init_exec_full_mask(&ctx->ac); /* NGG VS and NGG TES: Send gs_alloc_req and the prim export at the beginning to decrease @@ -1113,7 +1114,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * LLVMValueRef main_fn = ctx.main_fn; if (ngg_cull_main_fn) { - if (si_vs_needs_prolog(sel, &shader->key.ge.part.vs.prolog, &shader->key, true)) { + if (si_vs_needs_prolog(sel, &shader->key.ge.part.vs.prolog, &shader->key, true, false)) { union si_shader_part_key prolog_key; si_get_vs_prolog_key(&sel->info, shader->info.num_input_sgprs, true, &shader->key.ge.part.vs.prolog, shader, &prolog_key); @@ -1125,7 +1126,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * parts[num_parts++] = ngg_cull_main_fn; } - if (si_vs_needs_prolog(sel, &shader->key.ge.part.vs.prolog, &shader->key, false)) { + if (si_vs_needs_prolog(sel, &shader->key.ge.part.vs.prolog, &shader->key, false, false)) { union si_shader_part_key prolog_key; si_get_vs_prolog_key(&sel->info, shader->info.num_input_sgprs, false, &shader->key.ge.part.vs.prolog, shader, &prolog_key); @@ -1162,7 +1163,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * struct si_shader_selector *ls = shader->key.ge.part.tcs.ls; LLVMValueRef parts[4]; bool vs_needs_prolog = - si_vs_needs_prolog(ls, &shader->key.ge.part.tcs.ls_prolog, &shader->key, false); + si_vs_needs_prolog(ls, &shader->key.ge.part.tcs.ls_prolog, &shader->key, false, false); /* TCS main part */ parts[2] = ctx.main_fn; @@ -1254,7 +1255,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * /* ES prolog */ if (es->info.stage == MESA_SHADER_VERTEX && - si_vs_needs_prolog(es, &shader->key.ge.part.gs.vs_prolog, &shader->key, false)) { + si_vs_needs_prolog(es, &shader->key.ge.part.gs.vs_prolog, &shader->key, false, true)) { union si_shader_part_key vs_prolog_key; si_get_vs_prolog_key(&es->info, shader_es.info.num_input_sgprs, false, &shader->key.ge.part.gs.vs_prolog, shader, &vs_prolog_key);