diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index 07879ba9b89..5db01034852 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -925,14 +925,13 @@ static void cull_primitive(struct si_shader_context *ctx, * Also return the position, which is passed to the shader as an input, * so that we don't compute it twice. */ -void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi) +void gfx10_ngg_culling_build_end(struct si_shader_context *ctx) { - struct si_shader_context *ctx = si_shader_context_from_abi(abi); struct si_shader *shader = ctx->shader; struct si_shader_selector *sel = shader->selector; struct si_shader_info *info = &sel->info; LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef *addrs = abi->outputs; + LLVMValueRef *addrs = ctx->abi.outputs; unsigned max_waves = DIV_ROUND_UP(ctx->screen->ngg_subgroup_size, ctx->ac.wave_size); assert(shader->key.ge.opt.ngg_culling); @@ -1425,16 +1424,15 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi) } /** - * Emit the epilogue of an API VS or TES shader compiled as ESGS shader. + * Emit the end of an API VS or TES shader compiled as ESGS shader. */ -void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi) +void gfx10_ngg_build_end(struct si_shader_context *ctx) { - struct si_shader_context *ctx = si_shader_context_from_abi(abi); struct si_shader_selector *sel = ctx->shader->selector; struct si_shader_info *info = &sel->info; struct si_shader_output_values outputs[PIPE_MAX_SHADER_OUTPUTS]; LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef *addrs = abi->outputs; + LLVMValueRef *addrs = ctx->abi.outputs; LLVMValueRef tmp, tmp2; assert(!ctx->shader->is_gs_copy_shader); @@ -1684,7 +1682,7 @@ static LLVMValueRef ngg_gs_get_vertex_storage(struct si_shader_context *ctx) /** * Return a pointer to the LDS storage reserved for the N'th vertex, where N * is in emit order; that is: - * - during the epilogue, N is the threadidx (relative to the entire threadgroup) + * - at the shader end, N is the threadidx (relative to the entire threadgroup) * - during vertex emit, i.e. while the API GS shader invocation is running, * N = threadidx * gs.vertices_out + emitidx * @@ -1881,7 +1879,7 @@ void gfx10_ngg_gs_emit_prologue(struct si_shader_context *ctx) ac_build_s_barrier(&ctx->ac, ctx->stage); } -void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx) +void gfx10_ngg_gs_build_end(struct si_shader_context *ctx) { const struct si_shader_selector *sel = ctx->shader->selector; const struct si_shader_info *info = &sel->info; diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index f5e0b973b19..46a9f1f4cf0 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -190,11 +190,11 @@ bool gfx10_ngg_export_prim_early(struct si_shader *shader); void gfx10_ngg_build_sendmsg_gs_alloc_req(struct si_shader_context *ctx); void gfx10_ngg_build_export_prim(struct si_shader_context *ctx, LLVMValueRef user_edgeflags[3], LLVMValueRef prim_passthrough); -void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi); -void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi); +void gfx10_ngg_culling_build_end(struct si_shader_context *ctx); +void gfx10_ngg_build_end(struct si_shader_context *ctx); void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream, LLVMValueRef *addrs); void gfx10_ngg_gs_emit_prologue(struct si_shader_context *ctx); -void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx); +void gfx10_ngg_gs_build_end(struct si_shader_context *ctx); unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader); bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader); @@ -237,17 +237,17 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * /* si_shader_llvm_gs.c */ LLVMValueRef si_is_es_thread(struct si_shader_context *ctx); LLVMValueRef si_is_gs_thread(struct si_shader_context *ctx); -void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi); +void si_llvm_es_build_end(struct si_shader_context *ctx); void si_preload_esgs_ring(struct si_shader_context *ctx); void si_preload_gs_rings(struct si_shader_context *ctx); -void si_llvm_emit_gs_epilogue(struct ac_shader_abi *abi); +void si_llvm_gs_build_end(struct si_shader_context *ctx); void si_llvm_init_gs_callbacks(struct si_shader_context *ctx); /* si_shader_llvm_tess.c */ void si_llvm_preload_tes_rings(struct si_shader_context *ctx); -void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi); +void si_llvm_ls_build_end(struct si_shader_context *ctx); void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_part_key *key); -void si_llvm_emit_tcs_epilogue(struct ac_shader_abi *abi); +void si_llvm_tcs_build_end(struct si_shader_context *ctx); void si_llvm_init_tcs_callbacks(struct si_shader_context *ctx); void si_llvm_init_tes_callbacks(struct si_shader_context *ctx, bool ngg_cull_shader); @@ -256,7 +256,7 @@ LLVMValueRef si_get_sample_id(struct si_shader_context *ctx); void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part_key *key); void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part_key *key); void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader *shader); -void si_llvm_return_fs_outputs(struct ac_shader_abi *abi); +void si_llvm_ps_build_end(struct si_shader_context *ctx); void si_llvm_init_ps_callbacks(struct si_shader_context *ctx); /* si_shader_llvm_resources.c */ @@ -273,7 +273,7 @@ void si_llvm_emit_streamout(struct si_shader_context *ctx, struct si_shader_outp unsigned noutput, unsigned stream); void si_llvm_build_vs_exports(struct si_shader_context *ctx, struct si_shader_output_values *outputs, unsigned noutput); -void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi); +void si_llvm_vs_build_end(struct si_shader_context *ctx); void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part_key *key); void si_llvm_init_vs_callbacks(struct si_shader_context *ctx, bool ngg_cull_shader); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 53ec19b42e8..7f8da2c836a 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -901,11 +901,9 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad * so that empty waves can jump directly to s_endpgm, * which will also signal the barrier. * - * This is possible in gfx9, because an empty wave - * for the second shader does not participate in - * the epilogue. With NGG, empty waves may still - * be required to export data (e.g. GS output vertices), - * so we cannot let them exit early. + * This is possible in gfx9, because an empty wave for the second shader does not insert + * any ending. With NGG, empty waves may still be required to export data (e.g. GS output + * vertices), so we cannot let them exit early. * * If the shader is TCS and the TCS epilog is present * and contains a barrier, it will wait there and then @@ -1032,38 +1030,41 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad switch (sel->stage) { case MESA_SHADER_VERTEX: if (shader->key.ge.as_ls) - si_llvm_emit_ls_epilogue(&ctx->abi); + si_llvm_ls_build_end(ctx); else if (shader->key.ge.as_es) - si_llvm_emit_es_epilogue(&ctx->abi); + si_llvm_es_build_end(ctx); else if (ngg_cull_shader) - gfx10_emit_ngg_culling_epilogue(&ctx->abi); + gfx10_ngg_culling_build_end(ctx); else if (shader->key.ge.as_ngg) - gfx10_emit_ngg_epilogue(&ctx->abi); + gfx10_ngg_build_end(ctx); else - si_llvm_emit_vs_epilogue(&ctx->abi); + si_llvm_vs_build_end(ctx); break; case MESA_SHADER_TESS_CTRL: - si_llvm_emit_tcs_epilogue(&ctx->abi); + si_llvm_tcs_build_end(ctx); break; case MESA_SHADER_TESS_EVAL: if (ctx->shader->key.ge.as_es) - si_llvm_emit_es_epilogue(&ctx->abi); + si_llvm_es_build_end(ctx); else if (ngg_cull_shader) - gfx10_emit_ngg_culling_epilogue(&ctx->abi); + gfx10_ngg_culling_build_end(ctx); else if (ctx->shader->key.ge.as_ngg) - gfx10_emit_ngg_epilogue(&ctx->abi); + gfx10_ngg_build_end(ctx); else - si_llvm_emit_vs_epilogue(&ctx->abi); + si_llvm_vs_build_end(ctx); break; case MESA_SHADER_GEOMETRY: - si_llvm_emit_gs_epilogue(&ctx->abi); + if (ctx->shader->key.ge.as_ngg) + gfx10_ngg_gs_build_end(ctx); + else + si_llvm_gs_build_end(ctx); break; case MESA_SHADER_FRAGMENT: - si_llvm_return_fs_outputs(&ctx->abi); + si_llvm_ps_build_end(ctx); break; default: diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c index 92b289cd199..19e09e481f4 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c @@ -135,12 +135,11 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx) ctx->return_value = ret; } -void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi) +void si_llvm_es_build_end(struct si_shader_context *ctx) { - struct si_shader_context *ctx = si_shader_context_from_abi(abi); struct si_shader *es = ctx->shader; struct si_shader_info *info = &es->selector->info; - LLVMValueRef *addrs = abi->outputs; + LLVMValueRef *addrs = ctx->abi.outputs; LLVMValueRef lds_base = NULL; unsigned chan; int i; @@ -209,12 +208,11 @@ static LLVMValueRef ngg_get_emulated_counters_buf(struct si_shader_context *ctx) LLVMConstInt(ctx->ac.i32, SI_GS_QUERY_EMULATED_COUNTERS_BUF, false)); } -static void emit_gs_epilogue(struct si_shader_context *ctx) +void si_llvm_gs_build_end(struct si_shader_context *ctx) { - if (ctx->shader->key.ge.as_ngg) { - gfx10_ngg_gs_emit_epilogue(ctx); - return; - } + struct si_shader_info UNUSED *info = &ctx->shader->selector->info; + + assert(info->num_outputs <= AC_LLVM_MAX_OUTPUTS); if (ctx->screen->info.chip_class >= GFX10) ac_build_waitcnt(&ctx->ac, AC_WAIT_VSTORE); @@ -271,16 +269,6 @@ static void emit_gs_epilogue(struct si_shader_context *ctx) ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label); } -void si_llvm_emit_gs_epilogue(struct ac_shader_abi *abi) -{ - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - struct si_shader_info UNUSED *info = &ctx->shader->selector->info; - - assert(info->num_outputs <= AC_LLVM_MAX_OUTPUTS); - - emit_gs_epilogue(ctx); -} - /* Emit one vertex from the geometry shader */ static void si_llvm_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs) { diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c b/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c index 5b9b50298af..7ab160c3bc4 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c @@ -470,14 +470,13 @@ static void si_export_mrt_color(struct si_shader_context *ctx, LLVMValueRef *col * * The alpha-ref SGPR is returned via its original location. */ -void si_llvm_return_fs_outputs(struct ac_shader_abi *abi) +void si_llvm_ps_build_end(struct si_shader_context *ctx) { - struct si_shader_context *ctx = si_shader_context_from_abi(abi); struct si_shader *shader = ctx->shader; struct si_shader_info *info = &shader->selector->info; LLVMBuilderRef builder = ctx->ac.builder; unsigned i, j, vgpr; - LLVMValueRef *addrs = abi->outputs; + LLVMValueRef *addrs = ctx->abi.outputs; LLVMValueRef color[8][4] = {}; LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c index 32c5574cc2b..46e7ab62e42 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c @@ -832,9 +832,8 @@ static void si_write_tess_factors(struct si_shader_context *ctx, union si_shader } /* This only writes the tessellation factor levels. */ -void si_llvm_emit_tcs_epilogue(struct ac_shader_abi *abi) +void si_llvm_tcs_build_end(struct si_shader_context *ctx) { - struct si_shader_context *ctx = si_shader_context_from_abi(abi); LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset; @@ -947,9 +946,8 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx) ctx->return_value = ret; } -void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi) +void si_llvm_ls_build_end(struct si_shader_context *ctx) { - struct si_shader_context *ctx = si_shader_context_from_abi(abi); struct si_shader *shader = ctx->shader; struct si_shader_info *info = &shader->selector->info; unsigned i, chan; @@ -963,7 +961,7 @@ void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi) } LLVMValueRef vertex_dw_stride = get_tcs_in_vertex_dw_stride(ctx); LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id, vertex_dw_stride, ""); - LLVMValueRef *addrs = abi->outputs; + LLVMValueRef *addrs = ctx->abi.outputs; unsigned ret_offset = 8 + GFX9_TCS_NUM_USER_SGPR + 2; /* Write outputs to LDS. The next shader (TCS aka HS) will read diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c index ca0a1a869b5..1e5a10c20f8 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c @@ -778,12 +778,11 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx, } } -void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi) +void si_llvm_vs_build_end(struct si_shader_context *ctx) { - struct si_shader_context *ctx = si_shader_context_from_abi(abi); struct si_shader_info *info = &ctx->shader->selector->info; struct si_shader_output_values *outputs = NULL; - LLVMValueRef *addrs = abi->outputs; + LLVMValueRef *addrs = ctx->abi.outputs; int i, j; assert(!ctx->shader->is_gs_copy_shader);