diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 518caf9e29d..894e9676e53 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -456,13 +456,18 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args) ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids); - /* param_tcs_offchip_offset and param_tcs_factor_offset are - * placed after the user SGPRs. + /* For monolithic shaders, the TCS epilog code is generated by + * ac_nir_lower_hs_outputs_to_mem. */ - for (i = 0; i < GFX6_TCS_NUM_USER_SGPR + 2; i++) - ac_add_return(&args->ac, AC_ARG_SGPR); - for (i = 0; i < 11; i++) - ac_add_return(&args->ac, AC_ARG_VGPR); + if (!shader->is_monolithic) { + /* param_tcs_offchip_offset and param_tcs_factor_offset are + * placed after the user SGPRs. + */ + for (i = 0; i < GFX6_TCS_NUM_USER_SGPR + 2; i++) + ac_add_return(&args->ac, AC_ARG_SGPR); + for (i = 0; i < 11; i++) + ac_add_return(&args->ac, AC_ARG_VGPR); + } break; case SI_SHADER_MERGED_VERTEX_TESSCTRL: @@ -520,16 +525,21 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args) ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); } - /* TCS return values are inputs to the TCS epilog. - * - * param_tcs_offchip_offset, param_tcs_factor_offset, - * param_tcs_offchip_layout, and internal_bindings - * should be passed to the epilog. + /* For monolithic shaders, the TCS epilog code is generated by + * ac_nir_lower_hs_outputs_to_mem. */ - for (i = 0; i <= 8 + GFX9_SGPR_TCS_OUT_LAYOUT; i++) - ac_add_return(&args->ac, AC_ARG_SGPR); - for (i = 0; i < 11; i++) - ac_add_return(&args->ac, AC_ARG_VGPR); + if (!shader->is_monolithic) { + /* TCS return values are inputs to the TCS epilog. + * + * param_tcs_offchip_offset, param_tcs_factor_offset, + * param_tcs_offchip_layout, and internal_bindings + * should be passed to the epilog. + */ + for (i = 0; i <= 8 + GFX9_SGPR_TCS_OUT_LAYOUT; i++) + ac_add_return(&args->ac, AC_ARG_SGPR); + for (i = 0; i < 11; i++) + ac_add_return(&args->ac, AC_ARG_VGPR); + } } break; @@ -1587,9 +1597,14 @@ static bool si_lower_io_to_mem(struct si_shader *shader, nir_shader *nir, } else if (nir->info.stage == MESA_SHADER_TESS_CTRL) { NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, si_map_io_driver_location, key->ge.opt.same_patch_vertices); + + /* Used by hs_emit_write_tess_factors() when monolithic shader. */ + nir->info.tess._primitive_mode = key->ge.part.tcs.epilog.prim_mode; + NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, si_map_io_driver_location, sel->screen->info.gfx_level, - false, /* does not matter as we disabled final tess factor write */ + /* Used by hs_emit_write_tess_factors() when monolithic shader. */ + key->ge.part.tcs.epilog.tes_reads_tess_factors, ~0ULL, ~0ULL, /* no TES inputs filter */ util_last_bit64(sel->info.outputs_written), util_last_bit64(sel->info.patch_outputs_written), @@ -1597,7 +1612,9 @@ static bool si_lower_io_to_mem(struct si_shader *shader, nir_shader *nir, /* ALL TCS inputs are passed by register. */ key->ge.opt.same_patch_vertices && !(sel->info.base.inputs_read & ~sel->info.tcs_vgpr_only_inputs), - sel->info.tessfactors_are_def_in_all_invocs, false); + sel->info.tessfactors_are_def_in_all_invocs, + /* Emit epilog only when monolithic shader. */ + shader->is_monolithic); return true; } else if (nir->info.stage == MESA_SHADER_TESS_EVAL) { NIR_PASS_V(nir, ac_nir_lower_tes_inputs_to_mem, si_map_io_driver_location); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 603fa9132f4..685e152c2d9 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -1035,7 +1035,8 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade break; case MESA_SHADER_TESS_CTRL: - si_llvm_tcs_build_end(ctx); + if (!shader->is_monolithic) + si_llvm_tcs_build_end(ctx); break; case MESA_SHADER_TESS_EVAL: @@ -1122,19 +1123,13 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * if (sscreen->info.gfx_level >= GFX9) { struct si_shader_selector *ls = shader->key.ge.part.tcs.ls; - struct ac_llvm_pointer parts[4]; + struct ac_llvm_pointer parts[3]; bool vs_needs_prolog = si_vs_needs_prolog(ls, &shader->key.ge.part.tcs.ls_prolog); /* TCS main part */ parts[2] = ctx.main_fn; - /* TCS epilog */ - union si_shader_part_key tcs_epilog_key; - si_get_tcs_epilog_key(shader, &tcs_epilog_key); - si_llvm_build_tcs_epilog(&ctx, &tcs_epilog_key, false); - parts[3] = ctx.main_fn; - struct si_shader shader_ls = {}; shader_ls.selector = ls; shader_ls.key.ge.part.vs.prolog = shader->key.ge.part.tcs.ls_prolog; @@ -1176,26 +1171,10 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * ctx.shader = shader; ctx.stage = MESA_SHADER_TESS_CTRL; - si_build_wrapper_function(&ctx, parts + !vs_needs_prolog, 4 - !vs_needs_prolog, + si_build_wrapper_function(&ctx, parts + !vs_needs_prolog, 3 - !vs_needs_prolog, vs_needs_prolog, vs_needs_prolog ? 2 : 1, main_arg_types, shader->key.ge.opt.same_patch_vertices); - } else { - struct ac_llvm_pointer parts[2]; - union si_shader_part_key epilog_key; - - parts[0] = ctx.main_fn; - - for (int i = 0; i < ctx.args->ac.arg_count; i++) - main_arg_types[i] = ctx.args->ac.args[i].type; - main_arg_types[MIN2(AC_MAX_ARGS - 1, ctx.args->ac.arg_count)] = AC_ARG_INVALID; - - memset(&epilog_key, 0, sizeof(epilog_key)); - epilog_key.tcs_epilog.states = shader->key.ge.part.tcs.epilog; - si_llvm_build_tcs_epilog(&ctx, &epilog_key, false); - parts[1] = ctx.main_fn; - - si_build_wrapper_function(&ctx, parts, 2, 0, 0, main_arg_types, false); } } else if (shader->is_monolithic && sel->stage == MESA_SHADER_GEOMETRY) { if (ctx.screen->info.gfx_level >= GFX9) { diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c index 00cf40b311a..b352ee569c6 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c @@ -430,7 +430,7 @@ void si_llvm_tcs_build_end(struct si_shader_context *ctx) invocation_id = si_unpack_param(ctx, ctx->args->ac.tcs_rel_ids, 8, 5); tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx); - if (ctx->screen->info.gfx_level >= GFX9 && !ctx->shader->is_monolithic) { + if (ctx->screen->info.gfx_level >= GFX9) { LLVMBasicBlockRef blocks[2] = {LLVMGetInsertBlock(builder), ctx->merged_wrap_if_entry_block}; LLVMValueRef values[2];