radeonsi: make get_thread_id_in_tg non-static

for future work

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14266>
This commit is contained in:
Marek Olšák 2021-09-23 12:13:08 -04:00 committed by Marge Bot
parent 6386b95f0f
commit 7f6643ffd0
2 changed files with 12 additions and 11 deletions

View file

@ -38,7 +38,7 @@ static LLVMValueRef get_tgsize(struct si_shader_context *ctx)
return si_unpack_param(ctx, ctx->args.merged_wave_info, 28, 4);
}
static LLVMValueRef get_thread_id_in_tg(struct si_shader_context *ctx)
LLVMValueRef gfx10_get_thread_id_in_tg(struct si_shader_context *ctx)
{
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef tmp;
@ -277,7 +277,7 @@ static void build_streamout(struct si_shader_context *ctx, struct ngg_streamout
struct pipe_stream_output_info *so = &ctx->shader->selector->so;
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef buf_ptr = ac_get_arg(&ctx->ac, ctx->internal_bindings);
LLVMValueRef tid = get_thread_id_in_tg(ctx);
LLVMValueRef tid = gfx10_get_thread_id_in_tg(ctx);
LLVMValueRef tmp, tmp2;
LLVMValueRef i32_2 = LLVMConstInt(ctx->ac.i32, 2, false);
LLVMValueRef i32_4 = LLVMConstInt(ctx->ac.i32, 4, false);
@ -929,7 +929,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
assert(sel->info.stage == MESA_SHADER_VERTEX ||
(sel->info.stage == MESA_SHADER_TESS_EVAL && !shader->key.ge.as_es));
LLVMValueRef es_vtxptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
LLVMValueRef es_vtxptr = ngg_nogs_vertex_ptr(ctx, gfx10_get_thread_id_in_tg(ctx));
LLVMValueRef packed_data = ctx->ac.i32_0;
LLVMValueRef position[4] = {};
unsigned pos_index = 0;
@ -1056,7 +1056,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
for (unsigned i = 0; i < num_vertices; i++)
gs_vtxptr[i] = ngg_nogs_vertex_ptr(ctx, vtxindex[i]);
es_vtxptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
es_vtxptr = ngg_nogs_vertex_ptr(ctx, gfx10_get_thread_id_in_tg(ctx));
/* Adding these optimization barriers improves the generated code as follows. Crazy right?
*
@ -1419,7 +1419,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
LLVMValueRef vertex_ptr = NULL;
if (sel->so.num_outputs || gfx10_ngg_writes_user_edgeflags(ctx->shader))
vertex_ptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
vertex_ptr = ngg_nogs_vertex_ptr(ctx, gfx10_get_thread_id_in_tg(ctx));
for (unsigned i = 0; i < info->num_outputs; i++) {
outputs[i].semantic = info->output_semantic[i];
@ -1596,7 +1596,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
*/
if (info->output_semantic[i] == VARYING_SLOT_POS &&
ctx->shader->key.ge.opt.ngg_culling) {
vertex_ptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
vertex_ptr = ngg_nogs_vertex_ptr(ctx, gfx10_get_thread_id_in_tg(ctx));
for (unsigned j = 0; j < 4; j++) {
tmp = LLVMConstInt(ctx->ac.i32, lds_pos_x + j, 0);
@ -1618,7 +1618,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
/* Wait for GS stores to finish. */
ac_build_s_barrier(&ctx->ac);
tmp = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
tmp = ngg_nogs_vertex_ptr(ctx, gfx10_get_thread_id_in_tg(ctx));
tmp = ac_build_gep0(&ctx->ac, tmp, ctx->ac.i32_0);
outputs[i].values[0] = LLVMBuildLoad(builder, tmp, "");
} else {
@ -1758,7 +1758,7 @@ void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream, LL
ac_build_ifcc(&ctx->ac, can_emit, 9001);
const LLVMValueRef vertexptr = ngg_gs_emit_vertex_ptr(ctx, get_thread_id_in_tg(ctx), vertexidx);
const LLVMValueRef vertexptr = ngg_gs_emit_vertex_ptr(ctx, gfx10_get_thread_id_in_tg(ctx), vertexidx);
unsigned out_idx = 0;
for (unsigned i = 0; i < info->num_outputs; i++) {
for (unsigned chan = 0; chan < 4; chan++, out_idx++) {
@ -1816,7 +1816,7 @@ void gfx10_ngg_gs_emit_prologue(struct si_shader_context *ctx)
*/
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef scratchptr = ctx->gs_ngg_scratch;
LLVMValueRef tid = get_thread_id_in_tg(ctx);
LLVMValueRef tid = gfx10_get_thread_id_in_tg(ctx);
LLVMValueRef tmp;
tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, LLVMConstInt(ctx->ac.i32, 4, false), "");
@ -1850,7 +1850,7 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx)
if (!info->num_stream_output_components[stream])
continue;
const LLVMValueRef gsthread = get_thread_id_in_tg(ctx);
const LLVMValueRef gsthread = gfx10_get_thread_id_in_tg(ctx);
ac_build_bgnloop(&ctx->ac, 5100);
@ -1893,7 +1893,7 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx)
ac_build_s_barrier(&ctx->ac);
const LLVMValueRef tid = get_thread_id_in_tg(ctx);
const LLVMValueRef tid = gfx10_get_thread_id_in_tg(ctx);
LLVMValueRef num_emit_threads = ngg_get_prim_cnt(ctx);
/* Streamout */

View file

@ -185,6 +185,7 @@ void si_get_ps_epilog_key(struct si_shader *shader, union si_shader_part_key *ke
void si_fix_resource_usage(struct si_screen *sscreen, struct si_shader *shader);
/* gfx10_shader_ngg.c */
LLVMValueRef gfx10_get_thread_id_in_tg(struct si_shader_context *ctx);
bool gfx10_ngg_export_prim_early(struct si_shader *shader);
void gfx10_ngg_build_sendmsg_gs_alloc_req(struct si_shader_context *ctx);
void gfx10_ngg_build_export_prim(struct si_shader_context *ctx, LLVMValueRef user_edgeflags[3],