From a6675f35b20ac3a78a0d53d0851d59c464918fe0 Mon Sep 17 00:00:00 2001
From: Georg Lehmann <dadschoorse@gmail.com>
Date: Fri, 14 Mar 2025 09:06:02 +0100
Subject: [PATCH] aco: clamp exponent of 16bit ldexp

The hw uses only a 16bit int, but NIR's src is 32bit.

Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34073>
---
 .../instruction_selection/aco_select_nir_alu.cpp | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/amd/compiler/instruction_selection/aco_select_nir_alu.cpp b/src/amd/compiler/instruction_selection/aco_select_nir_alu.cpp
index 7c4f9da232c..ce20d2fd724 100644
--- a/src/amd/compiler/instruction_selection/aco_select_nir_alu.cpp
+++ b/src/amd/compiler/instruction_selection/aco_select_nir_alu.cpp
@@ -2379,7 +2379,21 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
    }
    case nir_op_ldexp: {
       if (dst.regClass() == v2b) {
-         emit_vop2_instruction(ctx, instr, aco_opcode::v_ldexp_f16, dst, false);
+         nir_scalar scalar = nir_get_scalar(&instr->def, 0);
+         scalar = nir_scalar_chase_alu_src(scalar, 1);
+
+         Temp exp;
+
+         /* Convert the exponent to 16bit int with saturation. */
+         if (nir_scalar_is_const(scalar)) {
+            int16_t clamped = MIN2(MAX2(nir_scalar_as_int(scalar), INT16_MIN), INT16_MAX);
+            exp = bld.copy(bld.def(v2b), Operand::c16(clamped));
+         } else {
+            exp = get_alu_src(ctx, instr->src[1]);
+            exp = bld.vop3(aco_opcode::v_cvt_pk_i16_i32, bld.def(v2b), exp, Operand::c32(0));
+         }
+
+         bld.vop2(aco_opcode::v_ldexp_f16, Definition(dst), get_alu_src(ctx, instr->src[0]), exp);
       } else if (dst.regClass() == v1) {
          emit_vop3a_instruction(ctx, instr, aco_opcode::v_ldexp_f32, dst);
       } else if (dst.regClass() == v2) {