radeonsi: add ACO-specific main shader parts

We can't have merged shaders where the first part is compiled using ACO
and the second part is compiled using LLVM.

Add ACO-specific main shader parts to fix that.

This happens when ACO is enabled for gfx12 streamout where GS can be paired
with a previous shader compiled by LLVM.

Fixes: 8ba718fb7d - radeonsi/gfx12: use ACO for streamout because it's faster

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34491>
This commit is contained in:
Marek Olšák 2025-04-10 13:38:26 -04:00 committed by Marge Bot
parent 4865ac57cc
commit 7f7d6deb18
3 changed files with 60 additions and 24 deletions

View file

@ -769,6 +769,7 @@ static bool si_shader_binary_open(struct si_screen *screen, struct si_shader *sh
#define add_part(shader_or_part) \ #define add_part(shader_or_part) \
if (shader_or_part) { \ if (shader_or_part) { \
assert(shader_or_part->binary.type == SI_SHADER_BINARY_ELF); \
part_elfs[num_parts] = (shader_or_part)->binary.code_buffer; \ part_elfs[num_parts] = (shader_or_part)->binary.code_buffer; \
part_sizes[num_parts] = (shader_or_part)->binary.code_size; \ part_sizes[num_parts] = (shader_or_part)->binary.code_size; \
num_parts++; \ num_parts++; \
@ -2609,6 +2610,11 @@ static void get_input_nir(struct si_shader *shader, struct si_nir_shader_ctx *ct
ctx->nir = sel->nir ? sel->nir : (sel->nir_binary ? si_deserialize_shader(sel) : NULL); ctx->nir = sel->nir ? sel->nir : (sel->nir_binary ? si_deserialize_shader(sel) : NULL);
assert(ctx->nir); assert(ctx->nir);
if (sel->stage <= MESA_SHADER_GEOMETRY)
ctx->nir->info.use_aco_amd = shader->key.ge.use_aco;
assert(ctx->nir->info.use_aco_amd == si_shader_uses_aco(shader));
if (unlikely(should_print_nir(ctx->nir))) { if (unlikely(should_print_nir(ctx->nir))) {
/* Modify the shader's name so that each variant gets its own name. */ /* Modify the shader's name so that each variant gets its own name. */
ctx->nir->info.name = ralloc_asprintf(ctx->nir, "%s-%08x", ctx->nir->info.name, ctx->nir->info.name = ralloc_asprintf(ctx->nir, "%s-%08x", ctx->nir->info.name,
@ -2632,6 +2638,7 @@ static void get_prev_stage_input_nir(struct si_shader *shader, struct si_linked_
linked->producer_shader.key.ge.as_es = 1; linked->producer_shader.key.ge.as_es = 1;
linked->producer_shader.key.ge.as_ngg = key->ge.as_ngg; linked->producer_shader.key.ge.as_ngg = key->ge.as_ngg;
} }
linked->producer_shader.key.ge.use_aco = key->ge.use_aco;
linked->producer_shader.next_shader = shader; linked->producer_shader.next_shader = shader;
linked->producer_shader.key.ge.mono = key->ge.mono; linked->producer_shader.key.ge.mono = key->ge.mono;
@ -2723,7 +2730,7 @@ static void
si_get_shader_variant_info(struct si_shader *shader, nir_shader *nir) si_get_shader_variant_info(struct si_shader *shader, nir_shader *nir)
{ {
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
assert(shader->selector->info.base.use_aco_amd == nir->info.use_aco_amd); assert(nir->info.use_aco_amd == si_shader_uses_aco(shader));
const BITSET_WORD *sysvals = nir->info.system_values_read; const BITSET_WORD *sysvals = nir->info.system_values_read;
/* ACO needs spi_ps_input_ena before si_init_shader_args. */ /* ACO needs spi_ps_input_ena before si_init_shader_args. */
@ -2899,7 +2906,11 @@ static void get_nir_shaders(struct si_shader *shader, struct si_linked_shaders *
for (unsigned i = 0; i < SI_NUM_LINKED_SHADERS; i++) { for (unsigned i = 0; i < SI_NUM_LINKED_SHADERS; i++) {
if (linked->shader[i].nir) { if (linked->shader[i].nir) {
struct si_shader_info info; struct si_shader_info info;
/* Save and restore use_aco_amd because si_nir_scan_shader changes it. */
bool use_aco_amd = linked->shader[i].nir->info.use_aco_amd;
si_nir_scan_shader(shader->selector->screen, linked->shader[i].nir, &info, true); si_nir_scan_shader(shader->selector->screen, linked->shader[i].nir, &info, true);
linked->shader[i].nir->info.use_aco_amd = use_aco_amd;
shader->info.uses_vmem_load_other |= info.uses_vmem_load_other; shader->info.uses_vmem_load_other |= info.uses_vmem_load_other;
shader->info.uses_vmem_sampler_or_bvh |= info.uses_vmem_sampler_or_bvh; shader->info.uses_vmem_sampler_or_bvh |= info.uses_vmem_sampler_or_bvh;
@ -3089,6 +3100,7 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64)) FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64))
float_mode &= ~V_00B028_FP_16_64_DENORMS; float_mode &= ~V_00B028_FP_16_64_DENORMS;
assert(nir->info.use_aco_amd == si_shader_uses_aco(shader));
ret = ret =
#if AMD_LLVM_AVAILABLE #if AMD_LLVM_AVAILABLE
!nir->info.use_aco_amd ? si_llvm_compile_shader(sscreen, compiler, shader, &linked, debug) : !nir->info.use_aco_amd ? si_llvm_compile_shader(sscreen, compiler, shader, &linked, debug) :
@ -3286,7 +3298,10 @@ static bool si_shader_select_tcs_parts(struct si_screen *sscreen, struct ac_llvm
if (sscreen->info.gfx_level >= GFX9) { if (sscreen->info.gfx_level >= GFX9) {
assert(shader->wave_size == 32 || shader->wave_size == 64); assert(shader->wave_size == 32 || shader->wave_size == 64);
unsigned wave_size_index = shader->wave_size == 64; unsigned wave_size_index = shader->wave_size == 64;
shader->previous_stage = shader->key.ge.part.tcs.ls->main_parts.named.ls[wave_size_index]; shader->previous_stage =
shader->key.ge.part.tcs.ls->main_parts.named.ls[wave_size_index][shader->key.ge.use_aco];
assert(shader->previous_stage->key.ge.use_aco == si_shader_uses_aco(shader));
assert((shader->previous_stage->binary.type == SI_SHADER_BINARY_RAW) == si_shader_uses_aco(shader));
} }
return true; return true;
@ -3302,10 +3317,13 @@ static bool si_shader_select_gs_parts(struct si_screen *sscreen, struct ac_llvm_
if (shader->key.ge.as_ngg) { if (shader->key.ge.as_ngg) {
assert(shader->wave_size == 32 || shader->wave_size == 64); assert(shader->wave_size == 32 || shader->wave_size == 64);
unsigned wave_size_index = shader->wave_size == 64; unsigned wave_size_index = shader->wave_size == 64;
shader->previous_stage = shader->key.ge.part.gs.es->main_parts.named.ngg_es[wave_size_index]; shader->previous_stage =
shader->key.ge.part.gs.es->main_parts.named.ngg_es[wave_size_index][shader->key.ge.use_aco];
} else { } else {
shader->previous_stage = shader->key.ge.part.gs.es->main_parts.named.es; shader->previous_stage = shader->key.ge.part.gs.es->main_parts.named.es[shader->key.ge.use_aco];
} }
assert(shader->previous_stage->key.ge.use_aco == si_shader_uses_aco(shader));
assert((shader->previous_stage->binary.type == SI_SHADER_BINARY_RAW) == si_shader_uses_aco(shader));
} }
return true; return true;

View file

@ -589,12 +589,12 @@ struct si_shader_info {
union si_main_shader_parts { union si_main_shader_parts {
struct si_main_shader_parts_named { struct si_main_shader_parts_named {
/* indices: [wave_size == 64] */ /* indices: [wave_size == 64][use_aco] */
struct si_shader *other[2]; struct si_shader *other[2][2];
struct si_shader *ls[2]; /* as_ls is set in the key */ struct si_shader *ls[2][2]; /* as_ls is set in the key */
struct si_shader *es; /* as_es && !as_ngg in the key */ struct si_shader *es[2]; /* as_es && !as_ngg in the key, always wave64 */
struct si_shader *ngg[2]; /* !as_es && as_ngg in the key */ struct si_shader *ngg[2][2]; /* !as_es && as_ngg in the key */
struct si_shader *ngg_es[2]; /* as_es && as_ngg in the key */ struct si_shader *ngg_es[2][2]; /* as_es && as_ngg in the key */
} named; } named;
struct si_shader *variants[sizeof(struct si_main_shader_parts_named) / sizeof(struct si_shader*)]; struct si_shader *variants[sizeof(struct si_main_shader_parts_named) / sizeof(struct si_shader*)];
}; };
@ -750,6 +750,7 @@ struct si_shader_key_ge {
unsigned as_ls : 1; /* whether it's VS before TCS */ unsigned as_ls : 1; /* whether it's VS before TCS */
unsigned as_ngg : 1; /* whether it's the last GE stage and NGG is enabled, unsigned as_ngg : 1; /* whether it's the last GE stage and NGG is enabled,
also set for the stage right before GS */ also set for the stage right before GS */
unsigned use_aco : 1; /* whether the shader variant is using ACO */
/* Flags for monolithic compilation only. */ /* Flags for monolithic compilation only. */
struct { struct {
@ -1118,18 +1119,19 @@ static inline struct si_shader **si_get_main_shader_part(struct si_shader_select
if (sel->stage <= MESA_SHADER_GEOMETRY) { if (sel->stage <= MESA_SHADER_GEOMETRY) {
if (key->ge.as_ls) if (key->ge.as_ls)
return &sel->main_parts.named.ls[wave_size_index]; return &sel->main_parts.named.ls[wave_size_index][key->ge.use_aco];
if (key->ge.as_es && key->ge.as_ngg) if (key->ge.as_es && key->ge.as_ngg)
return &sel->main_parts.named.ngg_es[wave_size_index]; return &sel->main_parts.named.ngg_es[wave_size_index][key->ge.use_aco];
if (key->ge.as_es) { if (key->ge.as_es) {
/* legacy GS only support wave 64 */ /* legacy GS only support wave 64 */
assert(wave_size == 64); assert(wave_size == 64);
return &sel->main_parts.named.es; return &sel->main_parts.named.es[key->ge.use_aco];
} }
if (key->ge.as_ngg) if (key->ge.as_ngg)
return &sel->main_parts.named.ngg[wave_size_index]; return &sel->main_parts.named.ngg[wave_size_index][key->ge.use_aco];
return &sel->main_parts.named.other[wave_size_index][key->ge.use_aco];
} }
return &sel->main_parts.named.other[wave_size_index]; return &sel->main_parts.named.other[wave_size_index][sel->info.base.use_aco_amd];
} }
static inline bool gfx10_has_variable_edgeflags(struct si_shader *shader) static inline bool gfx10_has_variable_edgeflags(struct si_shader *shader)
@ -1175,6 +1177,12 @@ static inline bool si_shader_culling_enabled(struct si_shader *shader)
(output_prim == MESA_PRIM_TRIANGLES || output_prim == MESA_PRIM_LINES); (output_prim == MESA_PRIM_TRIANGLES || output_prim == MESA_PRIM_LINES);
} }
static inline bool si_shader_uses_aco(struct si_shader *shader)
{
return shader->selector->stage <= MESA_SHADER_GEOMETRY ?
shader->key.ge.use_aco : shader->selector->info.base.use_aco_amd;
}
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View file

@ -2957,7 +2957,7 @@ static void si_build_shader_variant(struct si_shader *shader, int thread_index,
compiler = &shader->compiler_ctx_state.compiler; compiler = &shader->compiler_ctx_state.compiler;
} }
if (!sel->info.base.use_aco_amd && !*compiler) if (!si_shader_uses_aco(shader) && !*compiler)
*compiler = si_create_llvm_compiler(sscreen); *compiler = si_create_llvm_compiler(sscreen);
if (unlikely(!si_create_shader_variant(sscreen, *compiler, shader, debug))) { if (unlikely(!si_create_shader_variant(sscreen, *compiler, shader, debug))) {
@ -3011,6 +3011,7 @@ static bool si_check_missing_main_part(struct si_screen *sscreen, struct si_shad
main_part->key.ge.as_es = key->ge.as_es; main_part->key.ge.as_es = key->ge.as_es;
main_part->key.ge.as_ls = key->ge.as_ls; main_part->key.ge.as_ls = key->ge.as_ls;
main_part->key.ge.as_ngg = key->ge.as_ngg; main_part->key.ge.as_ngg = key->ge.as_ngg;
main_part->key.ge.use_aco = key->ge.use_aco;
} }
main_part->is_monolithic = false; main_part->is_monolithic = false;
main_part->wave_size = wave_size; main_part->wave_size = wave_size;
@ -3172,11 +3173,11 @@ current_not_ready:
} }
util_queue_fence_init(&shader->ready); util_queue_fence_init(&shader->ready);
shader->selector = sel;
if (!sel->info.base.use_aco_amd && !sctx->compiler) if (!si_shader_uses_aco(shader) && !sctx->compiler)
sctx->compiler = si_create_llvm_compiler(sctx->screen); sctx->compiler = si_create_llvm_compiler(sctx->screen);
shader->selector = sel;
*((SHADER_KEY_TYPE*)&shader->key) = *key; *((SHADER_KEY_TYPE*)&shader->key) = *key;
shader->wave_size = si_determine_wave_size(sscreen, shader); shader->wave_size = si_determine_wave_size(sscreen, shader);
shader->compiler_ctx_state.compiler = sctx->compiler; shader->compiler_ctx_state.compiler = sctx->compiler;
@ -3226,6 +3227,8 @@ current_not_ready:
assert(0); assert(0);
} }
shader1_key.ge.use_aco = ((struct si_shader_key_ge*)key)->use_aco;
simple_mtx_lock(&previous_stage_sel->mutex); simple_mtx_lock(&previous_stage_sel->mutex);
ok = si_check_missing_main_part(sscreen, previous_stage_sel, &shader->compiler_ctx_state, ok = si_check_missing_main_part(sscreen, previous_stage_sel, &shader->compiler_ctx_state,
&shader1_key, shader->wave_size); &shader1_key, shader->wave_size);
@ -3426,13 +3429,16 @@ static void si_init_shader_selector_async(void *job, void *gdata, int thread_ind
shader->is_monolithic = false; shader->is_monolithic = false;
si_parse_next_shader_property(&sel->info, &shader->key); si_parse_next_shader_property(&sel->info, &shader->key);
if (sel->stage <= MESA_SHADER_GEOMETRY && if (sel->stage <= MESA_SHADER_GEOMETRY) {
sscreen->use_ngg && (!sel->info.enabled_streamout_buffer_mask || if (sscreen->use_ngg && (!sel->info.enabled_streamout_buffer_mask ||
sscreen->info.gfx_level >= GFX11) && sscreen->info.gfx_level >= GFX11) &&
((sel->stage == MESA_SHADER_VERTEX && !shader->key.ge.as_ls) || ((sel->stage == MESA_SHADER_VERTEX && !shader->key.ge.as_ls) ||
sel->stage == MESA_SHADER_TESS_EVAL || sel->stage == MESA_SHADER_GEOMETRY)) sel->stage == MESA_SHADER_TESS_EVAL || sel->stage == MESA_SHADER_GEOMETRY))
shader->key.ge.as_ngg = 1; shader->key.ge.as_ngg = 1;
shader->key.ge.use_aco = sel->nir->info.use_aco_amd;
}
shader->wave_size = si_determine_wave_size(sscreen, shader); shader->wave_size = si_determine_wave_size(sscreen, shader);
if (sel->nir) { if (sel->nir) {
@ -3823,6 +3829,7 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
sctx->shader.vs.cso = sel; sctx->shader.vs.cso = sel;
sctx->shader.vs.current = (sel && sel->variants_count) ? sel->variants[0] : NULL; sctx->shader.vs.current = (sel && sel->variants_count) ? sel->variants[0] : NULL;
sctx->shader.vs.key.ge.use_aco = sel ? sel->info.base.use_aco_amd : 0;
sctx->num_vs_blit_sgprs = sel ? sel->info.base.vs.blit_sgprs_amd : 0; sctx->num_vs_blit_sgprs = sel ? sel->info.base.vs.blit_sgprs_amd : 0;
sctx->vs_uses_draw_id = sel ? sel->info.uses_drawid : false; sctx->vs_uses_draw_id = sel ? sel->info.uses_drawid : false;
@ -3914,6 +3921,7 @@ static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
sctx->shader.gs.cso = sel; sctx->shader.gs.cso = sel;
sctx->shader.gs.current = (sel && sel->variants_count) ? sel->variants[0] : NULL; sctx->shader.gs.current = (sel && sel->variants_count) ? sel->variants[0] : NULL;
sctx->shader.gs.key.ge.use_aco = sel ? sel->info.base.use_aco_amd : 0;
sctx->ia_multi_vgt_param_key.u.uses_gs = sel != NULL; sctx->ia_multi_vgt_param_key.u.uses_gs = sel != NULL;
si_update_common_shader_state(sctx, sel, PIPE_SHADER_GEOMETRY); si_update_common_shader_state(sctx, sel, PIPE_SHADER_GEOMETRY);
@ -3945,6 +3953,7 @@ static void si_bind_tcs_shader(struct pipe_context *ctx, void *state)
sctx->shader.tcs.cso = sel; sctx->shader.tcs.cso = sel;
sctx->shader.tcs.current = (sel && sel->variants_count) ? sel->variants[0] : NULL; sctx->shader.tcs.current = (sel && sel->variants_count) ? sel->variants[0] : NULL;
sctx->shader.tcs.key.ge.use_aco = sel ? sel->info.base.use_aco_amd : 0;
si_update_tess_uses_prim_id(sctx); si_update_tess_uses_prim_id(sctx);
si_update_tess_in_out_patch_vertices(sctx); si_update_tess_in_out_patch_vertices(sctx);
@ -3967,6 +3976,7 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
sctx->shader.tes.cso = sel; sctx->shader.tes.cso = sel;
sctx->shader.tes.current = (sel && sel->variants_count) ? sel->variants[0] : NULL; sctx->shader.tes.current = (sel && sel->variants_count) ? sel->variants[0] : NULL;
sctx->shader.tes.key.ge.use_aco = sel ? sel->info.base.use_aco_amd : 0;
sctx->ia_multi_vgt_param_key.u.uses_tess = sel != NULL; sctx->ia_multi_vgt_param_key.u.uses_tess = sel != NULL;
si_update_tess_uses_prim_id(sctx); si_update_tess_uses_prim_id(sctx);