radeonsi: make get_thread_id_in_tg non-static

for future work Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14266>
2026-05-05 20:28:04 +02:00 · 2021-09-23 12:13:08 -04:00 · 2021-09-23 12:13:08 -04:00 · 7f6643ffd0
commit 7f6643ffd0
parent 6386b95f0f
2 changed files with 12 additions and 11 deletions
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@ -38,7 +38,7 @@ static LLVMValueRef get_tgsize(struct si_shader_context *ctx)
   return si_unpack_param(ctx, ctx->args.merged_wave_info, 28, 4);
 }

-static LLVMValueRef get_thread_id_in_tg(struct si_shader_context *ctx)
+LLVMValueRef gfx10_get_thread_id_in_tg(struct si_shader_context *ctx)
 {
   LLVMBuilderRef builder = ctx->ac.builder;
   LLVMValueRef tmp;
@ -277,7 +277,7 @@ static void build_streamout(struct si_shader_context *ctx, struct ngg_streamout
   struct pipe_stream_output_info *so = &ctx->shader->selector->so;
   LLVMBuilderRef builder = ctx->ac.builder;
   LLVMValueRef buf_ptr = ac_get_arg(&ctx->ac, ctx->internal_bindings);
-   LLVMValueRef tid = get_thread_id_in_tg(ctx);
+   LLVMValueRef tid = gfx10_get_thread_id_in_tg(ctx);
   LLVMValueRef tmp, tmp2;
   LLVMValueRef i32_2 = LLVMConstInt(ctx->ac.i32, 2, false);
   LLVMValueRef i32_4 = LLVMConstInt(ctx->ac.i32, 4, false);
@ -929,7 +929,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
   assert(sel->info.stage == MESA_SHADER_VERTEX ||
          (sel->info.stage == MESA_SHADER_TESS_EVAL && !shader->key.ge.as_es));

-   LLVMValueRef es_vtxptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
+   LLVMValueRef es_vtxptr = ngg_nogs_vertex_ptr(ctx, gfx10_get_thread_id_in_tg(ctx));
   LLVMValueRef packed_data = ctx->ac.i32_0;
   LLVMValueRef position[4] = {};
   unsigned pos_index = 0;
@ -1056,7 +1056,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
   for (unsigned i = 0; i < num_vertices; i++)
      gs_vtxptr[i] = ngg_nogs_vertex_ptr(ctx, vtxindex[i]);

-   es_vtxptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
+   es_vtxptr = ngg_nogs_vertex_ptr(ctx, gfx10_get_thread_id_in_tg(ctx));

   /* Adding these optimization barriers improves the generated code as follows. Crazy right?
    *
@ -1419,7 +1419,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
   LLVMValueRef vertex_ptr = NULL;

   if (sel->so.num_outputs || gfx10_ngg_writes_user_edgeflags(ctx->shader))
-      vertex_ptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
+      vertex_ptr = ngg_nogs_vertex_ptr(ctx, gfx10_get_thread_id_in_tg(ctx));

   for (unsigned i = 0; i < info->num_outputs; i++) {
      outputs[i].semantic = info->output_semantic[i];
@ -1596,7 +1596,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
          */
         if (info->output_semantic[i] == VARYING_SLOT_POS &&
             ctx->shader->key.ge.opt.ngg_culling) {
-            vertex_ptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
+            vertex_ptr = ngg_nogs_vertex_ptr(ctx, gfx10_get_thread_id_in_tg(ctx));

            for (unsigned j = 0; j < 4; j++) {
               tmp = LLVMConstInt(ctx->ac.i32, lds_pos_x + j, 0);
@ -1618,7 +1618,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
            /* Wait for GS stores to finish. */
            ac_build_s_barrier(&ctx->ac);

-            tmp = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
+            tmp = ngg_nogs_vertex_ptr(ctx, gfx10_get_thread_id_in_tg(ctx));
            tmp = ac_build_gep0(&ctx->ac, tmp, ctx->ac.i32_0);
            outputs[i].values[0] = LLVMBuildLoad(builder, tmp, "");
         } else {
@ -1758,7 +1758,7 @@ void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream, LL

   ac_build_ifcc(&ctx->ac, can_emit, 9001);

-   const LLVMValueRef vertexptr = ngg_gs_emit_vertex_ptr(ctx, get_thread_id_in_tg(ctx), vertexidx);
+   const LLVMValueRef vertexptr = ngg_gs_emit_vertex_ptr(ctx, gfx10_get_thread_id_in_tg(ctx), vertexidx);
   unsigned out_idx = 0;
   for (unsigned i = 0; i < info->num_outputs; i++) {
      for (unsigned chan = 0; chan < 4; chan++, out_idx++) {
@ -1816,7 +1816,7 @@ void gfx10_ngg_gs_emit_prologue(struct si_shader_context *ctx)
    */
   LLVMBuilderRef builder = ctx->ac.builder;
   LLVMValueRef scratchptr = ctx->gs_ngg_scratch;
-   LLVMValueRef tid = get_thread_id_in_tg(ctx);
+   LLVMValueRef tid = gfx10_get_thread_id_in_tg(ctx);
   LLVMValueRef tmp;

   tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, LLVMConstInt(ctx->ac.i32, 4, false), "");
@ -1850,7 +1850,7 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx)
      if (!info->num_stream_output_components[stream])
         continue;

-      const LLVMValueRef gsthread = get_thread_id_in_tg(ctx);
+      const LLVMValueRef gsthread = gfx10_get_thread_id_in_tg(ctx);

      ac_build_bgnloop(&ctx->ac, 5100);

@ -1893,7 +1893,7 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx)

   ac_build_s_barrier(&ctx->ac);

-   const LLVMValueRef tid = get_thread_id_in_tg(ctx);
+   const LLVMValueRef tid = gfx10_get_thread_id_in_tg(ctx);
   LLVMValueRef num_emit_threads = ngg_get_prim_cnt(ctx);

   /* Streamout */
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@ -185,6 +185,7 @@ void si_get_ps_epilog_key(struct si_shader *shader, union si_shader_part_key *ke
 void si_fix_resource_usage(struct si_screen *sscreen, struct si_shader *shader);

 /* gfx10_shader_ngg.c */
+LLVMValueRef gfx10_get_thread_id_in_tg(struct si_shader_context *ctx);
 bool gfx10_ngg_export_prim_early(struct si_shader *shader);
 void gfx10_ngg_build_sendmsg_gs_alloc_req(struct si_shader_context *ctx);
 void gfx10_ngg_build_export_prim(struct si_shader_context *ctx, LLVMValueRef user_edgeflags[3],