ac/llvm: correctly load 16-bit TCS inputs from VGPRs and simplify

The conversions to integer and bitcasts are unnecessary because everything is already integer. Acked-by: Pierre-Eric Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40556>
2026-05-07 07:08:04 +02:00 · 2026-03-19 22:52:50 -04:00 · 2026-03-19 22:52:50 -04:00 · 195eea461c
commit 195eea461c
parent a48ffce4bd
3 changed files with 24 additions and 23 deletions
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@ -2615,6 +2615,7 @@ static LLVMValueRef visit_load_input(struct ac_nir_context *ctx, nir_intrinsic_i
 {
   LLVMValueRef values[8];
   LLVMTypeRef dest_type = get_def_type(ctx, &instr->def);
+   nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
   unsigned base = nir_intrinsic_base(instr);
   unsigned component = nir_intrinsic_component(instr);
   unsigned count = instr->def.num_components;
@ -2626,17 +2627,9 @@ static LLVMValueRef visit_load_input(struct ac_nir_context *ctx, nir_intrinsic_i

   /* This is used to load TCS inputs from VGPRs in radeonsi. */
   if (ctx->stage == MESA_SHADER_TESS_CTRL) {
-      LLVMTypeRef component_type = LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind ?
-                                      LLVMGetElementType(dest_type) : dest_type;
-
-      LLVMValueRef result = ctx->abi->load_tess_varyings(ctx->abi, component_type,
-                                                         nir_intrinsic_io_semantics(instr).location,
-                                                         component, count);
-      if (instr->def.bit_size == 16) {
-         result = ac_to_integer(&ctx->ac, result);
-         result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");
-      }
-      return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
+      return ctx->abi->load_tess_varyings(ctx->abi, instr->def.num_components,
+                                          instr->def.bit_size, sem.location,
+                                          component, sem.high_16bits);
   }

   assert(ctx->stage == MESA_SHADER_FRAGMENT);
--- a/src/amd/llvm/ac_shader_abi.h
+++ b/src/amd/llvm/ac_shader_abi.h
@ -25,9 +25,10 @@ struct ac_shader_abi {
   LLVMValueRef outputs[AC_LLVM_MAX_OUTPUTS * 4];
   bool is_16bit[AC_LLVM_MAX_OUTPUTS * 4];

-   LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi, LLVMTypeRef type,
-                                      gl_varying_slot slot, unsigned component,
-                                      unsigned num_components);
+   /* The result must be either scalar or vector i16 or i32. */
+   LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi, unsigned num_components,
+                                      unsigned bit_size, gl_varying_slot slot, unsigned component,
+                                      bool high_16bits);

   LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index);

--- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
@ -10,25 +10,32 @@
 #include "sid.h"
 #include "nir.h"

-static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMTypeRef type,
-                                             gl_varying_slot slot, unsigned component,
-                                             unsigned num_components)
+static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, unsigned num_components,
+                                             unsigned bit_size, gl_varying_slot slot,
+                                             unsigned component, bool high_16bits)
 {
   struct si_shader_context *ctx = si_shader_context_from_abi(abi);

   assert(ctx->shader->key.ge.opt.same_patch_vertices);
+   assert(bit_size == 16 || bit_size == 32);

   /* Load the TCS input from a VGPR. */
   unsigned func_param = ctx->args->ac.tcs_rel_ids.arg_index + 1 +
-      si_shader_io_get_unique_index(slot) * 4;
+                         si_shader_io_get_unique_index(slot) * 4;
+   LLVMValueRef *value = alloca(sizeof(LLVMValueRef) * num_components);

-   LLVMValueRef value[4];
-   for (unsigned i = component; i < component + num_components; i++) {
-      value[i] = LLVMGetParam(ctx->main_fn.value, func_param + i);
-      value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], type, "");
+   for (unsigned i = 0; i < num_components; i++) {
+      value[i] = LLVMGetParam(ctx->main_fn.value, func_param + component + i);
+
+      if (bit_size == 16) {
+         /* Extract low or high 16 bits from the value. */
+         value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], ctx->ac.v2i16, "");
+         value[i] = LLVMBuildExtractElement(ctx->ac.builder, value[i],
+                                            LLVMConstInt(ctx->ac.i32, high_16bits, 0), "");
+      }
   }

-   return ac_build_gather_values(&ctx->ac, value + component, num_components);
+   return ac_build_gather_values(&ctx->ac, value, num_components);
 }

 void si_llvm_ls_build_end(struct si_shader_context *ctx, const nir_shader *nir)