diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index b19017beb7e..8f7d640dbcc 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -769,6 +769,7 @@ static bool si_shader_binary_open(struct si_screen *screen, struct si_shader *sh #define add_part(shader_or_part) \ if (shader_or_part) { \ + assert(shader_or_part->binary.type == SI_SHADER_BINARY_ELF); \ part_elfs[num_parts] = (shader_or_part)->binary.code_buffer; \ part_sizes[num_parts] = (shader_or_part)->binary.code_size; \ num_parts++; \ @@ -2609,6 +2610,11 @@ static void get_input_nir(struct si_shader *shader, struct si_nir_shader_ctx *ct ctx->nir = sel->nir ? sel->nir : (sel->nir_binary ? si_deserialize_shader(sel) : NULL); assert(ctx->nir); + if (sel->stage <= MESA_SHADER_GEOMETRY) + ctx->nir->info.use_aco_amd = shader->key.ge.use_aco; + + assert(ctx->nir->info.use_aco_amd == si_shader_uses_aco(shader)); + if (unlikely(should_print_nir(ctx->nir))) { /* Modify the shader's name so that each variant gets its own name. */ ctx->nir->info.name = ralloc_asprintf(ctx->nir, "%s-%08x", ctx->nir->info.name, @@ -2632,6 +2638,7 @@ static void get_prev_stage_input_nir(struct si_shader *shader, struct si_linked_ linked->producer_shader.key.ge.as_es = 1; linked->producer_shader.key.ge.as_ngg = key->ge.as_ngg; } + linked->producer_shader.key.ge.use_aco = key->ge.use_aco; linked->producer_shader.next_shader = shader; linked->producer_shader.key.ge.mono = key->ge.mono; @@ -2723,7 +2730,7 @@ static void si_get_shader_variant_info(struct si_shader *shader, nir_shader *nir) { nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); - assert(shader->selector->info.base.use_aco_amd == nir->info.use_aco_amd); + assert(nir->info.use_aco_amd == si_shader_uses_aco(shader)); const BITSET_WORD *sysvals = nir->info.system_values_read; /* ACO needs spi_ps_input_ena before si_init_shader_args. */ @@ -2899,7 +2906,11 @@ static void get_nir_shaders(struct si_shader *shader, struct si_linked_shaders * for (unsigned i = 0; i < SI_NUM_LINKED_SHADERS; i++) { if (linked->shader[i].nir) { struct si_shader_info info; + + /* Save and restore use_aco_amd because si_nir_scan_shader changes it. */ + bool use_aco_amd = linked->shader[i].nir->info.use_aco_amd; si_nir_scan_shader(shader->selector->screen, linked->shader[i].nir, &info, true); + linked->shader[i].nir->info.use_aco_amd = use_aco_amd; shader->info.uses_vmem_load_other |= info.uses_vmem_load_other; shader->info.uses_vmem_sampler_or_bvh |= info.uses_vmem_sampler_or_bvh; @@ -3089,6 +3100,7 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64)) float_mode &= ~V_00B028_FP_16_64_DENORMS; + assert(nir->info.use_aco_amd == si_shader_uses_aco(shader)); ret = #if AMD_LLVM_AVAILABLE !nir->info.use_aco_amd ? si_llvm_compile_shader(sscreen, compiler, shader, &linked, debug) : @@ -3286,7 +3298,10 @@ static bool si_shader_select_tcs_parts(struct si_screen *sscreen, struct ac_llvm if (sscreen->info.gfx_level >= GFX9) { assert(shader->wave_size == 32 || shader->wave_size == 64); unsigned wave_size_index = shader->wave_size == 64; - shader->previous_stage = shader->key.ge.part.tcs.ls->main_parts.named.ls[wave_size_index]; + shader->previous_stage = + shader->key.ge.part.tcs.ls->main_parts.named.ls[wave_size_index][shader->key.ge.use_aco]; + assert(shader->previous_stage->key.ge.use_aco == si_shader_uses_aco(shader)); + assert((shader->previous_stage->binary.type == SI_SHADER_BINARY_RAW) == si_shader_uses_aco(shader)); } return true; @@ -3302,10 +3317,13 @@ static bool si_shader_select_gs_parts(struct si_screen *sscreen, struct ac_llvm_ if (shader->key.ge.as_ngg) { assert(shader->wave_size == 32 || shader->wave_size == 64); unsigned wave_size_index = shader->wave_size == 64; - shader->previous_stage = shader->key.ge.part.gs.es->main_parts.named.ngg_es[wave_size_index]; + shader->previous_stage = + shader->key.ge.part.gs.es->main_parts.named.ngg_es[wave_size_index][shader->key.ge.use_aco]; } else { - shader->previous_stage = shader->key.ge.part.gs.es->main_parts.named.es; + shader->previous_stage = shader->key.ge.part.gs.es->main_parts.named.es[shader->key.ge.use_aco]; } + assert(shader->previous_stage->key.ge.use_aco == si_shader_uses_aco(shader)); + assert((shader->previous_stage->binary.type == SI_SHADER_BINARY_RAW) == si_shader_uses_aco(shader)); } return true; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 8e3274fcad2..03777ec7f5d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -589,12 +589,12 @@ struct si_shader_info { union si_main_shader_parts { struct si_main_shader_parts_named { - /* indices: [wave_size == 64] */ - struct si_shader *other[2]; - struct si_shader *ls[2]; /* as_ls is set in the key */ - struct si_shader *es; /* as_es && !as_ngg in the key */ - struct si_shader *ngg[2]; /* !as_es && as_ngg in the key */ - struct si_shader *ngg_es[2]; /* as_es && as_ngg in the key */ + /* indices: [wave_size == 64][use_aco] */ + struct si_shader *other[2][2]; + struct si_shader *ls[2][2]; /* as_ls is set in the key */ + struct si_shader *es[2]; /* as_es && !as_ngg in the key, always wave64 */ + struct si_shader *ngg[2][2]; /* !as_es && as_ngg in the key */ + struct si_shader *ngg_es[2][2]; /* as_es && as_ngg in the key */ } named; struct si_shader *variants[sizeof(struct si_main_shader_parts_named) / sizeof(struct si_shader*)]; }; @@ -750,6 +750,7 @@ struct si_shader_key_ge { unsigned as_ls : 1; /* whether it's VS before TCS */ unsigned as_ngg : 1; /* whether it's the last GE stage and NGG is enabled, also set for the stage right before GS */ + unsigned use_aco : 1; /* whether the shader variant is using ACO */ /* Flags for monolithic compilation only. */ struct { @@ -1118,18 +1119,19 @@ static inline struct si_shader **si_get_main_shader_part(struct si_shader_select if (sel->stage <= MESA_SHADER_GEOMETRY) { if (key->ge.as_ls) - return &sel->main_parts.named.ls[wave_size_index]; + return &sel->main_parts.named.ls[wave_size_index][key->ge.use_aco]; if (key->ge.as_es && key->ge.as_ngg) - return &sel->main_parts.named.ngg_es[wave_size_index]; + return &sel->main_parts.named.ngg_es[wave_size_index][key->ge.use_aco]; if (key->ge.as_es) { /* legacy GS only support wave 64 */ assert(wave_size == 64); - return &sel->main_parts.named.es; + return &sel->main_parts.named.es[key->ge.use_aco]; } if (key->ge.as_ngg) - return &sel->main_parts.named.ngg[wave_size_index]; + return &sel->main_parts.named.ngg[wave_size_index][key->ge.use_aco]; + return &sel->main_parts.named.other[wave_size_index][key->ge.use_aco]; } - return &sel->main_parts.named.other[wave_size_index]; + return &sel->main_parts.named.other[wave_size_index][sel->info.base.use_aco_amd]; } static inline bool gfx10_has_variable_edgeflags(struct si_shader *shader) @@ -1175,6 +1177,12 @@ static inline bool si_shader_culling_enabled(struct si_shader *shader) (output_prim == MESA_PRIM_TRIANGLES || output_prim == MESA_PRIM_LINES); } +static inline bool si_shader_uses_aco(struct si_shader *shader) +{ + return shader->selector->stage <= MESA_SHADER_GEOMETRY ? + shader->key.ge.use_aco : shader->selector->info.base.use_aco_amd; +} + #ifdef __cplusplus } #endif diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 046b6ba3dee..9a6bb0fa1fd 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -2957,7 +2957,7 @@ static void si_build_shader_variant(struct si_shader *shader, int thread_index, compiler = &shader->compiler_ctx_state.compiler; } - if (!sel->info.base.use_aco_amd && !*compiler) + if (!si_shader_uses_aco(shader) && !*compiler) *compiler = si_create_llvm_compiler(sscreen); if (unlikely(!si_create_shader_variant(sscreen, *compiler, shader, debug))) { @@ -3011,6 +3011,7 @@ static bool si_check_missing_main_part(struct si_screen *sscreen, struct si_shad main_part->key.ge.as_es = key->ge.as_es; main_part->key.ge.as_ls = key->ge.as_ls; main_part->key.ge.as_ngg = key->ge.as_ngg; + main_part->key.ge.use_aco = key->ge.use_aco; } main_part->is_monolithic = false; main_part->wave_size = wave_size; @@ -3172,11 +3173,11 @@ current_not_ready: } util_queue_fence_init(&shader->ready); + shader->selector = sel; - if (!sel->info.base.use_aco_amd && !sctx->compiler) + if (!si_shader_uses_aco(shader) && !sctx->compiler) sctx->compiler = si_create_llvm_compiler(sctx->screen); - shader->selector = sel; *((SHADER_KEY_TYPE*)&shader->key) = *key; shader->wave_size = si_determine_wave_size(sscreen, shader); shader->compiler_ctx_state.compiler = sctx->compiler; @@ -3226,6 +3227,8 @@ current_not_ready: assert(0); } + shader1_key.ge.use_aco = ((struct si_shader_key_ge*)key)->use_aco; + simple_mtx_lock(&previous_stage_sel->mutex); ok = si_check_missing_main_part(sscreen, previous_stage_sel, &shader->compiler_ctx_state, &shader1_key, shader->wave_size); @@ -3426,12 +3429,15 @@ static void si_init_shader_selector_async(void *job, void *gdata, int thread_ind shader->is_monolithic = false; si_parse_next_shader_property(&sel->info, &shader->key); - if (sel->stage <= MESA_SHADER_GEOMETRY && - sscreen->use_ngg && (!sel->info.enabled_streamout_buffer_mask || - sscreen->info.gfx_level >= GFX11) && - ((sel->stage == MESA_SHADER_VERTEX && !shader->key.ge.as_ls) || - sel->stage == MESA_SHADER_TESS_EVAL || sel->stage == MESA_SHADER_GEOMETRY)) - shader->key.ge.as_ngg = 1; + if (sel->stage <= MESA_SHADER_GEOMETRY) { + if (sscreen->use_ngg && (!sel->info.enabled_streamout_buffer_mask || + sscreen->info.gfx_level >= GFX11) && + ((sel->stage == MESA_SHADER_VERTEX && !shader->key.ge.as_ls) || + sel->stage == MESA_SHADER_TESS_EVAL || sel->stage == MESA_SHADER_GEOMETRY)) + shader->key.ge.as_ngg = 1; + + shader->key.ge.use_aco = sel->nir->info.use_aco_amd; + } shader->wave_size = si_determine_wave_size(sscreen, shader); @@ -3823,6 +3829,7 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state) sctx->shader.vs.cso = sel; sctx->shader.vs.current = (sel && sel->variants_count) ? sel->variants[0] : NULL; + sctx->shader.vs.key.ge.use_aco = sel ? sel->info.base.use_aco_amd : 0; sctx->num_vs_blit_sgprs = sel ? sel->info.base.vs.blit_sgprs_amd : 0; sctx->vs_uses_draw_id = sel ? sel->info.uses_drawid : false; @@ -3914,6 +3921,7 @@ static void si_bind_gs_shader(struct pipe_context *ctx, void *state) sctx->shader.gs.cso = sel; sctx->shader.gs.current = (sel && sel->variants_count) ? sel->variants[0] : NULL; + sctx->shader.gs.key.ge.use_aco = sel ? sel->info.base.use_aco_amd : 0; sctx->ia_multi_vgt_param_key.u.uses_gs = sel != NULL; si_update_common_shader_state(sctx, sel, PIPE_SHADER_GEOMETRY); @@ -3945,6 +3953,7 @@ static void si_bind_tcs_shader(struct pipe_context *ctx, void *state) sctx->shader.tcs.cso = sel; sctx->shader.tcs.current = (sel && sel->variants_count) ? sel->variants[0] : NULL; + sctx->shader.tcs.key.ge.use_aco = sel ? sel->info.base.use_aco_amd : 0; si_update_tess_uses_prim_id(sctx); si_update_tess_in_out_patch_vertices(sctx); @@ -3967,6 +3976,7 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state) sctx->shader.tes.cso = sel; sctx->shader.tes.current = (sel && sel->variants_count) ? sel->variants[0] : NULL; + sctx->shader.tes.key.ge.use_aco = sel ? sel->info.base.use_aco_amd : 0; sctx->ia_multi_vgt_param_key.u.uses_tess = sel != NULL; si_update_tess_uses_prim_id(sctx);