radeonsi: determine MEM_ORDERED after generating a shader variant

because si_get_nir_shader runs NIR passes and some of them can introduce
new loads.

Fixes: 3fb77ef2e0 - radeonsi: do opt_large_constants & lower_indirect_derefs after uniform inlining

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14528>
This commit is contained in:
Marek Olšák 2022-01-13 02:02:45 -05:00 committed by Marge Bot
parent e5dd32a48c
commit afdfcdd542
5 changed files with 26 additions and 26 deletions

View file

@ -1488,6 +1488,15 @@ struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel,
return nir;
}
void si_update_shader_binary_info(struct si_shader *shader, nir_shader *nir)
{
struct si_shader_info info;
si_nir_scan_shader(nir, &info);
shader->info.uses_vmem_load_other |= info.uses_vmem_load_other;
shader->info.uses_vmem_sampler_or_bvh |= info.uses_vmem_sampler_or_bvh;
}
bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
struct si_shader *shader, struct pipe_debug_callback *debug)
{
@ -1508,6 +1517,8 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
shader->info.vs_output_ps_input_cntl[i] = SI_PS_INPUT_CNTL_UNUSED;
shader->info.vs_output_ps_input_cntl[VARYING_SLOT_COL0] = SI_PS_INPUT_CNTL_UNUSED_COLOR0;
si_update_shader_binary_info(shader, nir);
shader->info.uses_instanceid = sel->info.uses_instanceid;
shader->info.private_mem_vgprs = DIV_ROUND_UP(nir->scratch_size, 4);
@ -1812,6 +1823,9 @@ void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *ke
key->ps_prolog.ancillary_vgpr_index = shader->info.ancillary_vgpr_index;
key->ps_prolog.sample_coverage_vgpr_index = shader->info.sample_coverage_vgpr_index;
if (shader->key.ps.part.prolog.poly_stipple)
shader->info.uses_vmem_load_other = true;
if (info->colors_read) {
ubyte *color = shader->selector->color_attr_index;
@ -2101,16 +2115,7 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler
shader->is_binary_shared = true;
shader->binary = mainp->binary;
shader->config = mainp->config;
shader->info.num_input_sgprs = mainp->info.num_input_sgprs;
shader->info.num_input_vgprs = mainp->info.num_input_vgprs;
shader->info.face_vgpr_index = mainp->info.face_vgpr_index;
shader->info.ancillary_vgpr_index = mainp->info.ancillary_vgpr_index;
shader->info.sample_coverage_vgpr_index = mainp->info.sample_coverage_vgpr_index;
memcpy(shader->info.vs_output_ps_input_cntl, mainp->info.vs_output_ps_input_cntl,
sizeof(mainp->info.vs_output_ps_input_cntl));
shader->info.uses_instanceid = mainp->info.uses_instanceid;
shader->info.nr_pos_exports = mainp->info.nr_pos_exports;
shader->info.nr_param_exports = mainp->info.nr_param_exports;
shader->info = mainp->info;
/* Select prologs and/or epilogs. */
switch (sel->info.stage) {
@ -2188,6 +2193,8 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler
MAX2(shader->config.scratch_bytes_per_wave,
shader->previous_stage->config.scratch_bytes_per_wave);
shader->info.uses_instanceid |= shader->previous_stage->info.uses_instanceid;
shader->info.uses_vmem_load_other |= shader->previous_stage->info.uses_vmem_load_other;
shader->info.uses_vmem_sampler_or_bvh |= shader->previous_stage->info.uses_vmem_sampler_or_bvh;
}
if (shader->epilog) {
shader->config.num_sgprs =

View file

@ -744,6 +744,8 @@ struct si_shader_binary_info {
uint32_t vs_output_ps_input_cntl[NUM_TOTAL_VARYING_SLOTS];
ubyte num_input_sgprs;
ubyte num_input_vgprs;
bool uses_vmem_load_other; /* all other VMEM loads and atomics with return */
bool uses_vmem_sampler_or_bvh;
signed char face_vgpr_index;
signed char ancillary_vgpr_index;
signed char sample_coverage_vgpr_index;
@ -931,6 +933,7 @@ struct si_shader_part {
};
/* si_shader.c */
void si_update_shader_binary_info(struct si_shader *shader, nir_shader *nir);
bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
struct si_shader *shader, struct pipe_debug_callback *debug);
bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,

View file

@ -567,6 +567,7 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *inf
{
nir_function *func;
memset(info, 0, sizeof(*info));
info->base = nir->info;
info->stage = nir->info.stage;

View file

@ -1190,6 +1190,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
shader_ls.is_monolithic = true;
nir = si_get_nir_shader(ls, &shader_ls.key, &free_nir);
si_update_shader_binary_info(shader, nir);
if (!si_llvm_translate_nir(&ctx, &shader_ls, nir, free_nir, false)) {
si_llvm_dispose(&ctx);
@ -1247,6 +1248,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
shader_es.is_monolithic = true;
nir = si_get_nir_shader(es, &shader_es.key, &free_nir);
si_update_shader_binary_info(shader, nir);
if (!si_llvm_translate_nir(&ctx, &shader_es, nir, free_nir, false)) {
si_llvm_dispose(&ctx);

View file

@ -505,23 +505,10 @@ bool si_shader_mem_ordered(struct si_shader *shader)
if (shader->selector->screen->info.chip_class < GFX10)
return false;
const struct si_shader_info *info = &shader->selector->info;
const struct si_shader_info *prev_info =
shader->previous_stage_sel ? &shader->previous_stage_sel->info : NULL;
bool sampler_or_bvh = info->uses_vmem_sampler_or_bvh;
bool other = info->uses_vmem_load_other ||
shader->config.scratch_bytes_per_wave ||
(info->stage == MESA_SHADER_FRAGMENT &&
shader->key.ps.part.prolog.poly_stipple);
if (prev_info) {
sampler_or_bvh |= prev_info->uses_vmem_sampler_or_bvh;
other |= prev_info->uses_vmem_load_other;
}
/* Return true if both types of VMEM that return something are used. */
return sampler_or_bvh && other;
return shader->info.uses_vmem_sampler_or_bvh &&
(shader->info.uses_vmem_load_other ||
shader->config.scratch_bytes_per_wave);
}
static void si_set_tesseval_regs(struct si_screen *sscreen, const struct si_shader_selector *tes,