diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 8402ee4727a..7b5779ea306 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -711,16 +711,20 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args) num_prolog_vgprs += num_color_elements; } - /* Outputs for the epilog. */ - num_return_sgprs = SI_SGPR_ALPHA_REF + 1; - num_returns = num_return_sgprs + util_bitcount(shader->selector->info.colors_written) * 4 + - shader->selector->info.writes_z + shader->selector->info.writes_stencil + - shader->selector->info.writes_samplemask + 1 /* SampleMaskIn */; + /* Monolithic PS emit epilog in NIR directly. */ + if (!shader->is_monolithic) { + /* Outputs for the epilog. */ + num_return_sgprs = SI_SGPR_ALPHA_REF + 1; + num_returns = + num_return_sgprs + util_bitcount(shader->selector->info.colors_written) * 4 + + shader->selector->info.writes_z + shader->selector->info.writes_stencil + + shader->selector->info.writes_samplemask + 1 /* SampleMaskIn */; - for (i = 0; i < num_return_sgprs; i++) - ac_add_return(&args->ac, AC_ARG_SGPR); - for (; i < num_returns; i++) - ac_add_return(&args->ac, AC_ARG_VGPR); + for (i = 0; i < num_return_sgprs; i++) + ac_add_return(&args->ac, AC_ARG_SGPR); + for (; i < num_returns; i++) + ac_add_return(&args->ac, AC_ARG_VGPR); + } break; case MESA_SHADER_COMPUTE: @@ -1999,6 +2003,23 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader, } } else if (is_legacy_gs) { NIR_PASS_V(nir, ac_nir_lower_legacy_gs, false, sel->screen->use_ngg, output_info); + } else if (sel->stage == MESA_SHADER_FRAGMENT && shader->is_monolithic) { + ac_nir_lower_ps_options options = { + .gfx_level = sel->screen->info.gfx_level, + .family = sel->screen->info.family, + .uses_discard = si_shader_uses_discard(shader), + .alpha_to_coverage_via_mrtz = key->ps.part.epilog.alpha_to_coverage_via_mrtz, + .dual_src_blend_swizzle = key->ps.part.epilog.dual_src_blend_swizzle, + .spi_shader_col_format = key->ps.part.epilog.spi_shader_col_format, + .color_is_int8 = key->ps.part.epilog.color_is_int8, + .color_is_int10 = key->ps.part.epilog.color_is_int10, + .clamp_color = key->ps.part.epilog.clamp_color, + .alpha_to_one = key->ps.part.epilog.alpha_to_one, + .alpha_func = key->ps.part.epilog.alpha_func, + .broadcast_last_cbuf = key->ps.part.epilog.last_cbuf, + }; + + NIR_PASS_V(nir, ac_nir_lower_ps, &options); } NIR_PASS(progress2, nir, si_nir_lower_abi, shader, args); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 685e152c2d9..304d626ca0c 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -1050,7 +1050,8 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade break; case MESA_SHADER_FRAGMENT: - si_llvm_ps_build_end(ctx); + if (!shader->is_monolithic) + si_llvm_ps_build_end(ctx); break; default: diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c b/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c index 41cb49a33d4..d6e0c7e3cb1 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c @@ -850,30 +850,26 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader *shader) { - struct ac_llvm_pointer parts[3]; - unsigned num_parts = 0, main_index; + union si_shader_part_key prolog_key; + si_get_ps_prolog_key(shader, &prolog_key, false); + + /* If no prolog is needed, we only have the main part, no need to build wrapper function. */ + if (!si_need_ps_prolog(&prolog_key)) + return; + struct ac_llvm_pointer main_fn = ctx->main_fn; + /* Preserve main arguments. */ enum ac_arg_type main_arg_types[AC_MAX_ARGS]; for (int i = 0; i < ctx->args->ac.arg_count; i++) main_arg_types[i] = ctx->args->ac.args[i].type; + si_llvm_build_ps_prolog(ctx, &prolog_key, false); - union si_shader_part_key prolog_key; - si_get_ps_prolog_key(shader, &prolog_key, false); + struct ac_llvm_pointer parts[2] = { + ctx->main_fn, /* prolog */ + main_fn, /* main */ + }; - if (si_need_ps_prolog(&prolog_key)) { - si_llvm_build_ps_prolog(ctx, &prolog_key, false); - parts[num_parts++] = ctx->main_fn; - } - - main_index = num_parts; - parts[num_parts++] = main_fn; - - union si_shader_part_key epilog_key; - si_get_ps_epilog_key(shader, &epilog_key); - si_llvm_build_ps_epilog(ctx, &epilog_key, false); - parts[num_parts++] = ctx->main_fn; - - si_build_wrapper_function(ctx, parts, num_parts, main_index, 0, main_arg_types, false); + si_build_wrapper_function(ctx, parts, 2, 1, 0, main_arg_types, false); }