radeonsi: get LS+HS and ES+GS together in get_nir_shader instead of separately

This is a prerequisite for linking merged shaders.

At the beginning of get_nir_shader (renamed to get_nir_shaders), we get
both shaders that are going to be merged, and then we optimize them together
and pass them to LLVM or ACO-specific code as struct si_linked_shaders.

The code setting uses_instance_id is moved because the previous place
doesn't work with this new organization.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32910>
This commit is contained in:
Marek Olšák 2024-12-29 01:32:50 -05:00 committed by Marge Bot
parent b6f13a0397
commit a9e210184b
4 changed files with 128 additions and 106 deletions

View file

@ -2234,11 +2234,12 @@ si_init_gs_output_info(struct si_shader_info *info, struct si_gs_output_info *ou
* better code or lower undesirable representations (like derefs). Lowering passes that prevent
* linking optimizations or destroy shader_info shouldn't be run here.
*/
static bool run_pre_link_optimization_passes(struct si_shader *shader, nir_shader *nir,
bool *opts_not_run)
static bool run_pre_link_optimization_passes(struct si_nir_shader_ctx *ctx, bool *opts_not_run)
{
struct si_shader *shader = ctx->shader;
struct si_shader_selector *sel = shader->selector;
const union si_shader_key *key = &shader->key;
nir_shader *nir = ctx->nir;
bool progress = false;
/* Kill outputs according to the shader key. */
@ -2391,10 +2392,10 @@ static bool run_pre_link_optimization_passes(struct si_shader *shader, nir_shade
* (those should be run before this) because any changes in shader_info won't be reflected
* in hw registers from now on.
*/
static void run_late_optimization_and_lowering_passes(struct si_shader *shader,
struct si_nir_shader_ctx *ctx,
static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx *ctx,
bool progress, bool opts_not_run)
{
struct si_shader *shader = ctx->shader;
struct si_shader_selector *sel = shader->selector;
const union si_shader_key *key = &shader->key;
nir_shader *nir = ctx->nir;
@ -2652,9 +2653,8 @@ static void run_late_optimization_and_lowering_passes(struct si_shader *shader,
static void get_input_nir(struct si_shader *shader, struct si_nir_shader_ctx *ctx)
{
struct si_shader_selector *sel = shader->selector;
const union si_shader_key *key = &shader->key;
memset(ctx, 0, sizeof(*ctx));
ctx->shader = shader;
ctx->free_nir = !sel->nir && sel->nir_binary;
ctx->nir = sel->nir ? sel->nir : (sel->nir_binary ? si_deserialize_shader(sel) : NULL);
assert(ctx->nir);
@ -2662,7 +2662,7 @@ static void get_input_nir(struct si_shader *shader, struct si_nir_shader_ctx *ct
if (unlikely(should_print_nir(ctx->nir))) {
/* Modify the shader's name so that each variant gets its own name. */
ctx->nir->info.name = ralloc_asprintf(ctx->nir, "%s-%08x", ctx->nir->info.name,
_mesa_hash_data(key, sizeof(*key)));
_mesa_hash_data(&shader->key, sizeof(shader->key)));
/* Dummy pass to get the starting point. */
printf("nir_dummy_pass\n");
@ -2670,17 +2670,76 @@ static void get_input_nir(struct si_shader *shader, struct si_nir_shader_ctx *ct
}
}
static void get_nir_shader(struct si_shader *shader, struct si_nir_shader_ctx *ctx)
static void get_prev_stage_input_nir(struct si_shader *shader, struct si_linked_shaders *linked)
{
bool opts_not_run = true;
const union si_shader_key *key = &shader->key;
get_input_nir(shader, ctx);
bool progress = run_pre_link_optimization_passes(shader, ctx->nir, &opts_not_run);
if (shader->selector->stage == MESA_SHADER_TESS_CTRL) {
linked->producer_shader.selector = key->ge.part.tcs.ls;
linked->producer_shader.key.ge.as_ls = 1;
} else {
linked->producer_shader.selector = key->ge.part.gs.es;
linked->producer_shader.key.ge.as_es = 1;
linked->producer_shader.key.ge.as_ngg = key->ge.as_ngg;
}
linked->producer_shader.next_shader = shader;
linked->producer_shader.key.ge.mono = key->ge.mono;
linked->producer_shader.key.ge.opt = key->ge.opt;
linked->producer_shader.key.ge.opt.inline_uniforms = false; /* only TCS/GS can inline uniforms */
/* kill_outputs was computed based on second shader's outputs so we can't use it to
* kill first shader's outputs.
*/
linked->producer_shader.key.ge.opt.kill_outputs = 0;
linked->producer_shader.is_monolithic = true;
linked->producer_shader.wave_size = shader->wave_size;
get_input_nir(&linked->producer_shader, &linked->producer);
}
static void get_nir_shaders(struct si_shader *shader, struct si_linked_shaders *linked)
{
memset(linked, 0, sizeof(*linked));
get_input_nir(shader, &linked->consumer);
if (shader->selector->screen->info.gfx_level >= GFX9 && shader->is_monolithic &&
(shader->selector->stage == MESA_SHADER_TESS_CTRL ||
shader->selector->stage == MESA_SHADER_GEOMETRY))
get_prev_stage_input_nir(shader, linked);
bool progress[SI_NUM_LINKED_SHADERS] = {0};
bool opts_not_run[SI_NUM_LINKED_SHADERS] = {true, true};
for (unsigned i = 0; i < SI_NUM_LINKED_SHADERS; i++) {
if (linked->shader[i].nir) {
progress[i] = run_pre_link_optimization_passes(&linked->shader[i], &opts_not_run[i]);
}
}
/* TODO: run linking optimizations here if we have LS+HS or ES+GS */
/* TODO: gather shader_info here */
run_late_optimization_and_lowering_passes(shader, ctx, progress, opts_not_run);
/* TODO: gather shader_info here */
if (shader->selector->stage <= MESA_SHADER_GEOMETRY) {
shader->info.uses_instanceid |=
shader->key.ge.mono.instance_divisor_is_one ||
shader->key.ge.mono.instance_divisor_is_fetched;
if (linked->producer.nir) {
shader->info.uses_instanceid |=
linked->producer.shader->selector->info.uses_instanceid ||
linked->producer.shader->info.uses_instanceid;
}
}
for (unsigned i = 0; i < SI_NUM_LINKED_SHADERS; i++) {
if (linked->shader[i].nir) {
run_late_optimization_and_lowering_passes(&linked->shader[i], progress[i],
opts_not_run[i]);
}
}
if (linked->producer.nir)
si_update_shader_binary_info(shader, linked->producer.nir);
}
void si_update_shader_binary_info(struct si_shader *shader, nir_shader *nir)
@ -2763,12 +2822,16 @@ si_nir_generate_gs_copy_shader(struct si_screen *sscreen,
sscreen->options.vrs2x2,
output_info);
struct si_shader_args args;
si_init_shader_args(shader, &args, &gs_nir->info);
struct si_linked_shaders linked;
memset(&linked, 0, sizeof(linked));
linked.consumer.nir = nir;
NIR_PASS_V(nir, si_nir_lower_abi, shader, &args);
si_init_shader_args(shader, &linked.consumer.args, &gs_nir->info);
NIR_PASS_V(nir, si_nir_lower_abi, shader, &linked.consumer.args);
NIR_PASS_V(nir, ac_nir_lower_intrinsics_to_args, sscreen->info.gfx_level,
sscreen->info.has_ls_vgpr_init_bug, AC_HW_VERTEX_SHADER, 64, 64, &args.ac);
sscreen->info.has_ls_vgpr_init_bug, AC_HW_VERTEX_SHADER, 64, 64,
&linked.consumer.args.ac);
si_nir_opts(gs_selector->screen, nir, false);
@ -2781,10 +2844,9 @@ si_nir_generate_gs_copy_shader(struct si_screen *sscreen,
bool ok =
#if AMD_LLVM_AVAILABLE
!gs_nir->info.use_aco_amd ? si_llvm_compile_shader(sscreen, compiler, shader,
&args, debug, nir) :
!gs_nir->info.use_aco_amd ? si_llvm_compile_shader(sscreen, compiler, shader, &linked, debug) :
#endif
si_aco_compile_shader(shader, &args, nir, debug);
si_aco_compile_shader(shader, &linked, debug);
if (ok) {
assert(!shader->config.scratch_bytes_per_wave);
@ -2974,9 +3036,9 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
if (sel->stage == MESA_SHADER_FRAGMENT && sel->info.base.use_aco_amd)
si_set_spi_ps_input_config(shader);
struct si_nir_shader_ctx ctx;
get_nir_shader(shader, &ctx);
nir_shader *nir = ctx.nir;
struct si_linked_shaders linked;
get_nir_shaders(shader, &linked);
nir_shader *nir = linked.consumer.nir;
/* Dump NIR before doing NIR->LLVM conversion in case the
* conversion fails. */
@ -3027,10 +3089,9 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
ret =
#if AMD_LLVM_AVAILABLE
!nir->info.use_aco_amd ? si_llvm_compile_shader(sscreen, compiler, shader, &ctx.args,
debug, nir) :
!nir->info.use_aco_amd ? si_llvm_compile_shader(sscreen, compiler, shader, &linked, debug) :
#endif
si_aco_compile_shader(shader, &ctx.args, nir, debug);
si_aco_compile_shader(shader, &linked, debug);
if (!ret)
goto out;
@ -3041,7 +3102,7 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
if (nir->info.stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) {
shader->gs_copy_shader =
si_nir_generate_gs_copy_shader(sscreen, compiler, shader, nir, debug,
&ctx.legacy_gs_output_info.info);
&linked.consumer.legacy_gs_output_info.info);
if (!shader->gs_copy_shader) {
fprintf(stderr, "radeonsi: can't create GS copy shader\n");
ret = false;
@ -3149,8 +3210,10 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
}
out:
if (ctx.free_nir)
ralloc_free(nir);
for (unsigned i = 0; i < SI_NUM_LINKED_SHADERS; i++) {
if (linked.shader[i].free_nir)
ralloc_free(linked.shader[i].nir);
}
return ret;
}
@ -3680,43 +3743,6 @@ void si_shader_destroy(struct si_shader *shader)
free(shader->shader_log);
}
void si_get_prev_stage_nir_shader(struct si_shader *shader, struct si_shader *prev_shader,
struct si_nir_shader_ctx *ctx)
{
const struct si_shader_selector *sel = shader->selector;
const union si_shader_key *key = &shader->key;
if (sel->stage == MESA_SHADER_TESS_CTRL) {
struct si_shader_selector *ls = key->ge.part.tcs.ls;
prev_shader->selector = ls;
prev_shader->key.ge.as_ls = 1;
} else {
struct si_shader_selector *es = key->ge.part.gs.es;
prev_shader->selector = es;
prev_shader->key.ge.as_es = 1;
prev_shader->key.ge.as_ngg = key->ge.as_ngg;
}
prev_shader->next_shader = shader;
prev_shader->key.ge.mono = key->ge.mono;
prev_shader->key.ge.opt = key->ge.opt;
prev_shader->key.ge.opt.inline_uniforms = false; /* only TCS/GS can inline uniforms */
/* kill_outputs was computed based on second shader's outputs so we can't use it to
* kill first shader's outputs.
*/
prev_shader->key.ge.opt.kill_outputs = 0;
prev_shader->is_monolithic = true;
prev_shader->wave_size = shader->wave_size;
get_nir_shader(prev_shader, ctx);
si_update_shader_binary_info(shader, ctx->nir);
shader->info.uses_instanceid |=
prev_shader->selector->info.uses_instanceid || prev_shader->info.uses_instanceid;
}
void si_get_ps_prolog_args(struct si_shader_args *args,
const union si_shader_part_key *key)
{

View file

@ -143,42 +143,33 @@ si_aco_build_shader_binary(void **data, const struct ac_shader_config *config,
}
bool
si_aco_compile_shader(struct si_shader *shader,
struct si_shader_args *args,
struct nir_shader *nir,
si_aco_compile_shader(struct si_shader *shader, struct si_linked_shaders *linked,
struct util_debug_callback *debug)
{
const struct si_shader_selector *sel = shader->selector;
nir_shader *nir = linked->consumer.nir;
struct aco_compiler_options options = {0};
si_fill_aco_options(sel->screen, nir->info.stage, &options, debug);
struct aco_shader_info info = {0};
si_fill_aco_shader_info(shader, &info, args);
si_fill_aco_shader_info(shader, &info, &linked->consumer.args);
const struct ac_shader_args *args = &linked->consumer.args.ac;
nir_shader *shaders[2];
unsigned num_shaders = 0;
struct si_shader prev_shader = {};
struct si_nir_shader_ctx prev_ctx;
prev_ctx.free_nir = false;
/* For merged shader stage. */
if (shader->is_monolithic && sel->screen->info.gfx_level >= GFX9 &&
(nir->info.stage == MESA_SHADER_TESS_CTRL || nir->info.stage == MESA_SHADER_GEOMETRY)) {
si_get_prev_stage_nir_shader(shader, &prev_shader, &prev_ctx);
shaders[num_shaders++] = prev_ctx.nir;
args = &prev_ctx.args;
if (linked->producer.nir) {
shaders[num_shaders++] = linked->producer.nir;
args = &linked->producer.args.ac;
}
shaders[num_shaders++] = nir;
aco_compile_shader(&options, &info, num_shaders, shaders, &args->ac,
aco_compile_shader(&options, &info, num_shaders, shaders, args,
si_aco_build_shader_binary, (void **)shader);
if (prev_ctx.free_nir)
ralloc_free(shaders[0]);
return true;
}

View file

@ -92,12 +92,28 @@ struct si_gs_output_info {
};
struct si_nir_shader_ctx {
struct si_shader *shader;
struct si_shader_args args;
struct si_gs_output_info legacy_gs_output_info;
nir_shader *nir;
bool free_nir;
};
#define SI_NUM_LINKED_SHADERS 2
struct si_linked_shaders {
/* Temporary si_shader for the first shader of merged shaders. */
struct si_shader producer_shader;
union {
struct {
struct si_nir_shader_ctx producer;
struct si_nir_shader_ctx consumer;
};
struct si_nir_shader_ctx shader[SI_NUM_LINKED_SHADERS];
};
};
struct nir_builder;
typedef struct nir_builder nir_builder;
@ -110,8 +126,6 @@ bool si_is_merged_shader(struct si_shader *shader);
unsigned si_get_max_workgroup_size(const struct si_shader *shader);
enum ac_hw_stage si_select_hw_stage(const gl_shader_stage stage, const union si_shader_key *const key,
const enum amd_gfx_level gfx_level);
void si_get_prev_stage_nir_shader(struct si_shader *shader, struct si_shader *prev_shader,
struct si_nir_shader_ctx *ctx);
void si_get_ps_prolog_args(struct si_shader_args *args,
const union si_shader_part_key *key);
void si_get_ps_epilog_args(struct si_shader_args *args,
@ -143,17 +157,15 @@ bool si_nir_lower_vs_inputs(nir_shader *nir, struct si_shader *shader,
/* si_shader_llvm.c */
bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
struct si_shader *shader, struct si_shader_args *args,
struct util_debug_callback *debug, struct nir_shader *nir);
struct si_shader *shader, struct si_linked_shaders *linked,
struct util_debug_callback *debug);
bool si_llvm_build_shader_part(struct si_screen *sscreen, gl_shader_stage stage,
bool prolog, struct ac_llvm_compiler *compiler,
struct util_debug_callback *debug, const char *name,
struct si_shader_part *result);
/* si_shader_aco.c */
bool si_aco_compile_shader(struct si_shader *shader,
struct si_shader_args *args,
struct nir_shader *nir,
bool si_aco_compile_shader(struct si_shader *shader, struct si_linked_shaders *linked,
struct util_debug_callback *debug);
void si_aco_resolve_symbols(struct si_shader *shader, uint32_t *code_for_write,
const uint32_t *code_for_read, uint64_t scratch_va,

View file

@ -529,7 +529,7 @@ static LLVMValueRef si_llvm_load_sampler_desc(struct ac_shader_abi *abi, LLVMVal
}
static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shader,
struct nir_shader *nir, bool free_nir)
struct nir_shader *nir)
{
struct si_shader_selector *sel = shader->selector;
const struct si_shader_info *info = &sel->info;
@ -764,18 +764,16 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade
}
si_llvm_build_ret(ctx, ctx->return_value);
if (free_nir)
ralloc_free(nir);
return true;
}
bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
struct si_shader *shader, struct si_shader_args *args,
struct util_debug_callback *debug, struct nir_shader *nir)
struct si_shader *shader, struct si_linked_shaders *linked,
struct util_debug_callback *debug)
{
struct si_shader_selector *sel = shader->selector;
struct si_shader_context ctx;
nir_shader *nir = linked->consumer.nir;
enum ac_float_mode float_mode = nir->info.stage == MESA_SHADER_KERNEL ?
AC_FLOAT_MODE_DEFAULT : AC_FLOAT_MODE_DEFAULT_OPENGL;
bool exports_color_null = false;
@ -792,27 +790,22 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size, exports_color_null, exports_mrtz,
float_mode);
ctx.args = args;
ctx.args = &linked->consumer.args;
if (!si_llvm_translate_nir(&ctx, shader, nir, false)) {
if (!si_llvm_translate_nir(&ctx, shader, nir)) {
si_llvm_dispose(&ctx);
return false;
}
/* For merged shader stage. */
if (shader->is_monolithic && sscreen->info.gfx_level >= GFX9 &&
(nir->info.stage == MESA_SHADER_TESS_CTRL || nir->info.stage == MESA_SHADER_GEOMETRY)) {
if (linked->producer.nir) {
/* LS or ES shader. */
struct si_shader prev_shader = {};
struct si_nir_shader_ctx prev_nir_ctx;
si_get_prev_stage_nir_shader(shader, &prev_shader, &prev_nir_ctx);
ctx.args = &prev_nir_ctx.args;
ctx.args = &linked->producer.args;
struct ac_llvm_pointer parts[2];
parts[1] = ctx.main_fn;
if (!si_llvm_translate_nir(&ctx, &prev_shader, prev_nir_ctx.nir, prev_nir_ctx.free_nir)) {
if (!si_llvm_translate_nir(&ctx, linked->producer.shader, linked->producer.nir)) {
si_llvm_dispose(&ctx);
return false;
}