ac/llvm: correctly load 16-bit TCS inputs from VGPRs and simplify

The conversions to integer and bitcasts are unnecessary because everything
is already integer.

Acked-by: Pierre-Eric
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40556>
This commit is contained in:
Marek Olšák 2026-03-19 22:52:50 -04:00 committed by Marge Bot
parent a48ffce4bd
commit 195eea461c
3 changed files with 24 additions and 23 deletions

View file

@ -2615,6 +2615,7 @@ static LLVMValueRef visit_load_input(struct ac_nir_context *ctx, nir_intrinsic_i
{
LLVMValueRef values[8];
LLVMTypeRef dest_type = get_def_type(ctx, &instr->def);
nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
unsigned base = nir_intrinsic_base(instr);
unsigned component = nir_intrinsic_component(instr);
unsigned count = instr->def.num_components;
@ -2626,17 +2627,9 @@ static LLVMValueRef visit_load_input(struct ac_nir_context *ctx, nir_intrinsic_i
/* This is used to load TCS inputs from VGPRs in radeonsi. */
if (ctx->stage == MESA_SHADER_TESS_CTRL) {
LLVMTypeRef component_type = LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind ?
LLVMGetElementType(dest_type) : dest_type;
LLVMValueRef result = ctx->abi->load_tess_varyings(ctx->abi, component_type,
nir_intrinsic_io_semantics(instr).location,
component, count);
if (instr->def.bit_size == 16) {
result = ac_to_integer(&ctx->ac, result);
result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");
}
return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
return ctx->abi->load_tess_varyings(ctx->abi, instr->def.num_components,
instr->def.bit_size, sem.location,
component, sem.high_16bits);
}
assert(ctx->stage == MESA_SHADER_FRAGMENT);

View file

@ -25,9 +25,10 @@ struct ac_shader_abi {
LLVMValueRef outputs[AC_LLVM_MAX_OUTPUTS * 4];
bool is_16bit[AC_LLVM_MAX_OUTPUTS * 4];
LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi, LLVMTypeRef type,
gl_varying_slot slot, unsigned component,
unsigned num_components);
/* The result must be either scalar or vector i16 or i32. */
LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi, unsigned num_components,
unsigned bit_size, gl_varying_slot slot, unsigned component,
bool high_16bits);
LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index);

View file

@ -10,25 +10,32 @@
#include "sid.h"
#include "nir.h"
static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMTypeRef type,
gl_varying_slot slot, unsigned component,
unsigned num_components)
static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, unsigned num_components,
unsigned bit_size, gl_varying_slot slot,
unsigned component, bool high_16bits)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
assert(ctx->shader->key.ge.opt.same_patch_vertices);
assert(bit_size == 16 || bit_size == 32);
/* Load the TCS input from a VGPR. */
unsigned func_param = ctx->args->ac.tcs_rel_ids.arg_index + 1 +
si_shader_io_get_unique_index(slot) * 4;
si_shader_io_get_unique_index(slot) * 4;
LLVMValueRef *value = alloca(sizeof(LLVMValueRef) * num_components);
LLVMValueRef value[4];
for (unsigned i = component; i < component + num_components; i++) {
value[i] = LLVMGetParam(ctx->main_fn.value, func_param + i);
value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], type, "");
for (unsigned i = 0; i < num_components; i++) {
value[i] = LLVMGetParam(ctx->main_fn.value, func_param + component + i);
if (bit_size == 16) {
/* Extract low or high 16 bits from the value. */
value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], ctx->ac.v2i16, "");
value[i] = LLVMBuildExtractElement(ctx->ac.builder, value[i],
LLVMConstInt(ctx->ac.i32, high_16bits, 0), "");
}
}
return ac_build_gather_values(&ctx->ac, value + component, num_components);
return ac_build_gather_values(&ctx->ac, value, num_components);
}
void si_llvm_ls_build_end(struct si_shader_context *ctx, const nir_shader *nir)