ac/llvm: remove handling of input and output loads/stores that are lowered

There is a lot that we still use. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28607>
2026-05-08 04:48:08 +02:00 · 2024-03-30 23:21:16 -04:00 · 2024-03-30 23:21:16 -04:00 · 105e22f6fd
commit 105e22f6fd
parent ce7ca0d80b
4 changed files with 19 additions and 76 deletions
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@ -2125,23 +2125,16 @@ static void visit_store_output(struct ac_nir_context *ctx, nir_intrinsic_instr *
   unsigned writemask = nir_intrinsic_write_mask(instr);
   unsigned component = nir_intrinsic_component(instr);
   LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
+   ASSERTED unsigned bit_size = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src));
   ASSERTED nir_src offset = *nir_get_io_offset_src(instr);

+   /* Non-monolithic PS and also LS before TCS in radeonsi use this to forward outputs to
+    * registers.
+    */
+   assert(bit_size == 16 || bit_size == 32);
   /* No indirect indexing is allowed here. */
   assert(nir_src_is_const(offset) && nir_src_as_uint(offset) == 0);

-   switch (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src))) {
-   case 16:
-   case 32:
-      break;
-   case 64:
-      unreachable("64-bit IO should have been lowered to 32 bits");
-      return;
-   default:
-      unreachable("unhandled store_output bit size");
-      return;
-   }
-
   writemask <<= component;

   for (unsigned chan = 0; chan < 8; chan++) {
@ -2885,50 +2878,26 @@ static LLVMValueRef load_interpolated_input(struct ac_nir_context *ctx, LLVMValu
   return ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, num_components));
 }

-static LLVMValueRef visit_load(struct ac_nir_context *ctx, nir_intrinsic_instr *instr,
-                               bool is_output)
+static LLVMValueRef visit_load(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
 {
   LLVMValueRef values[8];
   LLVMTypeRef dest_type = get_def_type(ctx, &instr->def);
-   LLVMTypeRef component_type;
   unsigned base = nir_intrinsic_base(instr);
   unsigned component = nir_intrinsic_component(instr);
   unsigned count = instr->def.num_components;
-   nir_src *vertex_index_src = nir_get_io_arrayed_index_src(instr);
-   LLVMValueRef vertex_index = vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
   nir_src offset = *nir_get_io_offset_src(instr);
-   LLVMValueRef indir_index = NULL;

-   switch (instr->def.bit_size) {
-   case 16:
-   case 32:
-      break;
-   case 64:
-      if (ctx->stage != MESA_SHADER_VERTEX || is_output) {
-         unreachable("64-bit IO should have been lowered");
-         return NULL;
-      }
-      break;
-   default:
-      unreachable("unhandled load type");
-      return NULL;
-   }
-
-   if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)
-      component_type = LLVMGetElementType(dest_type);
-   else
-      component_type = dest_type;
-
-   if (nir_src_is_const(offset))
-      assert(nir_src_as_uint(offset) == 0);
-   else
-      indir_index = get_src(ctx, offset);
+   assert(instr->def.bit_size == 16 || instr->def.bit_size == 32);
+   /* No indirect indexing allowed. */
+   assert(nir_src_is_const(offset) && nir_src_as_uint(offset) == 0);

+   /* This is used to load TCS inputs from VGPRs in radeonsi. */
   if (ctx->stage == MESA_SHADER_TESS_CTRL) {
+      LLVMTypeRef component_type = LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind ?
+                                      LLVMGetElementType(dest_type) : dest_type;
+
      LLVMValueRef result = ctx->abi->load_tess_varyings(ctx->abi, component_type,
-                                                         vertex_index, indir_index,
-                                                         base, component,
-                                                         count, !is_output);
+                                                         base, component, count);
      if (instr->def.bit_size == 16) {
         result = ac_to_integer(&ctx->ac, result);
         result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");
@ -2936,22 +2905,6 @@ static LLVMValueRef visit_load(struct ac_nir_context *ctx, nir_intrinsic_instr *
      return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
   }

-   /* No indirect indexing is allowed after this point. */
-   assert(!indir_index);
-
-   /* Other non-fragment cases have outputs in temporaries. */
-   if (is_output && (ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL)) {
-      assert(is_output);
-
-      for (unsigned chan = component; chan < count + component; chan++)
-         values[chan] = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32,
-                                       ctx->abi->outputs[base * 4 + chan], "");
-
-      LLVMValueRef result = ac_build_varying_gather_values(&ctx->ac, values, count, component);
-      return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
-   }
-
-   /* Fragment shader inputs. */
   assert(ctx->stage == MESA_SHADER_FRAGMENT);
   unsigned vertex_id = 0; /* P0 */

@ -3203,14 +3156,9 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
   case nir_intrinsic_load_input:
   case nir_intrinsic_load_input_vertex:
   case nir_intrinsic_load_per_vertex_input:
-      result = visit_load(ctx, instr, false);
-      break;
-   case nir_intrinsic_load_output:
-   case nir_intrinsic_load_per_vertex_output:
-      result = visit_load(ctx, instr, true);
+      result = visit_load(ctx, instr);
      break;
   case nir_intrinsic_store_output:
-   case nir_intrinsic_store_per_vertex_output:
      visit_store_output(ctx, instr);
      break;
   case nir_intrinsic_load_shared:
--- a/src/amd/llvm/ac_shader_abi.h
+++ b/src/amd/llvm/ac_shader_abi.h
@ -42,9 +42,8 @@ struct ac_shader_abi {
   unsigned fs_input_attr_indices[MAX_VARYING];

   LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi, LLVMTypeRef type,
-                                      LLVMValueRef vertex_index, LLVMValueRef param_index,
                                      unsigned driver_location, unsigned component,
-                                      unsigned num_components, bool load_inputs);
+                                      unsigned num_components);

   LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index);

--- a/src/gallium/drivers/radeonsi/si_shader_llvm.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c
@ -731,12 +731,9 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade
      ctx->stage == MESA_SHADER_VERTEX && shader->key.ge.as_ls &&
      shader->key.ge.opt.same_patch_vertices;

-   bool tcs_need_output =
-      ctx->stage == MESA_SHADER_TESS_CTRL && info->tessfactors_are_def_in_all_invocs;
-
   bool ps_need_output = ctx->stage == MESA_SHADER_FRAGMENT;

-   if (ls_need_output || tcs_need_output || ps_need_output) {
+   if (ls_need_output || ps_need_output) {
      for (unsigned i = 0; i < info->num_outputs; i++) {
         LLVMTypeRef type = ctx->ac.f32;

--- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
@ -10,14 +10,13 @@
 #include "sid.h"

 static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMTypeRef type,
-                                             LLVMValueRef vertex_index, LLVMValueRef param_index,
                                             unsigned driver_location, unsigned component,
-                                             unsigned num_components, bool load_input)
+                                             unsigned num_components)
 {
   struct si_shader_context *ctx = si_shader_context_from_abi(abi);
   struct si_shader_info *info = &ctx->shader->selector->info;

-   assert(ctx->shader->key.ge.opt.same_patch_vertices && !param_index);
+   assert(ctx->shader->key.ge.opt.same_patch_vertices);

   uint8_t semantic = info->input[driver_location].semantic;
   /* Load the TCS input from a VGPR. */