From 08a47fa05cf9b808ebf2dfe390a9e9070eeb5b9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 29 Dec 2024 14:26:12 -0500 Subject: [PATCH] ac/llvm: lower vector load_const in NIR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Timur Kristóf Part-of: --- src/amd/llvm/ac_nir_to_llvm.c | 40 ++++-------------------- src/gallium/drivers/radeonsi/si_shader.c | 11 ++----- 2 files changed, 9 insertions(+), 42 deletions(-) diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index e210d3c4dc5..d3940ffea27 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -1269,40 +1269,13 @@ static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) return true; } -static bool visit_load_const(struct ac_nir_context *ctx, const nir_load_const_instr *instr) +static void visit_load_const(struct ac_nir_context *ctx, const nir_load_const_instr *instr) { - LLVMValueRef values[16], value = NULL; - LLVMTypeRef element_type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size); + assert(instr->def.num_components == 1); - for (unsigned i = 0; i < instr->def.num_components; ++i) { - switch (instr->def.bit_size) { - case 1: - values[i] = LLVMConstInt(element_type, instr->value[i].b, false); - break; - case 8: - values[i] = LLVMConstInt(element_type, instr->value[i].u8, false); - break; - case 16: - values[i] = LLVMConstInt(element_type, instr->value[i].u16, false); - break; - case 32: - values[i] = LLVMConstInt(element_type, instr->value[i].u32, false); - break; - case 64: - values[i] = LLVMConstInt(element_type, instr->value[i].u64, false); - break; - default: - fprintf(stderr, "unsupported nir load_const bit_size: %d\n", instr->def.bit_size); - return false; - } - } - if (instr->def.num_components > 1) { - value = LLVMConstVector(values, instr->def.num_components); - } else - value = values[0]; - - ctx->ssa_defs[instr->def.index] = value; - return true; + ctx->ssa_defs[instr->def.index] = + LLVMConstInt(LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size), + nir_const_value_as_uint(instr->value[0], instr->def.bit_size), false); } /* Gather4 should follow the same rules as bilinear filtering, but the hardware @@ -3868,8 +3841,7 @@ static bool visit_block(struct ac_nir_context *ctx, nir_block *block) return false; break; case nir_instr_type_load_const: - if (!visit_load_const(ctx, nir_instr_as_load_const(instr))) - return false; + visit_load_const(ctx, nir_instr_as_load_const(instr)); break; case nir_instr_type_intrinsic: if (!visit_intrinsic(ctx, nir_instr_as_intrinsic(instr))) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index b467df0fb5c..1761ae8ec20 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2694,11 +2694,8 @@ static struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_ */ NIR_PASS(progress, nir, nir_opt_move, nir_move_comparisons); - /* aco only accept scalar const, must be done after si_nir_late_opts() - * which may generate vec const. - */ - if (nir->info.use_aco_amd) - NIR_PASS_V(nir, nir_lower_load_const_to_scalar); + /* This must be done after si_nir_late_opts() because it may generate vec const. */ + NIR_PASS(_, nir, nir_lower_load_const_to_scalar); /* This helps LLVM form VMEM clauses and thus get more GPU cache hits. * 200 is tuned for Viewperf. It should be done last. @@ -2802,9 +2799,7 @@ si_nir_generate_gs_copy_shader(struct si_screen *sscreen, si_nir_opts(gs_selector->screen, nir, false); - /* aco only accept scalar const */ - if (gs_nir->info.use_aco_amd) - NIR_PASS_V(nir, nir_lower_load_const_to_scalar); + NIR_PASS_V(nir, nir_lower_load_const_to_scalar); if (si_can_dump_shader(sscreen, MESA_SHADER_GEOMETRY, SI_DUMP_NIR)) { fprintf(stderr, "GS Copy Shader:\n");