From a6675f35b20ac3a78a0d53d0851d59c464918fe0 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Fri, 14 Mar 2025 09:06:02 +0100 Subject: [PATCH] aco: clamp exponent of 16bit ldexp The hw uses only a 16bit int, but NIR's src is 32bit. Cc: mesa-stable Part-of: --- .../instruction_selection/aco_select_nir_alu.cpp | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/instruction_selection/aco_select_nir_alu.cpp b/src/amd/compiler/instruction_selection/aco_select_nir_alu.cpp index 7c4f9da232c..ce20d2fd724 100644 --- a/src/amd/compiler/instruction_selection/aco_select_nir_alu.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_nir_alu.cpp @@ -2379,7 +2379,21 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) } case nir_op_ldexp: { if (dst.regClass() == v2b) { - emit_vop2_instruction(ctx, instr, aco_opcode::v_ldexp_f16, dst, false); + nir_scalar scalar = nir_get_scalar(&instr->def, 0); + scalar = nir_scalar_chase_alu_src(scalar, 1); + + Temp exp; + + /* Convert the exponent to 16bit int with saturation. */ + if (nir_scalar_is_const(scalar)) { + int16_t clamped = MIN2(MAX2(nir_scalar_as_int(scalar), INT16_MIN), INT16_MAX); + exp = bld.copy(bld.def(v2b), Operand::c16(clamped)); + } else { + exp = get_alu_src(ctx, instr->src[1]); + exp = bld.vop3(aco_opcode::v_cvt_pk_i16_i32, bld.def(v2b), exp, Operand::c32(0)); + } + + bld.vop2(aco_opcode::v_ldexp_f16, Definition(dst), get_alu_src(ctx, instr->src[0]), exp); } else if (dst.regClass() == v1) { emit_vop3a_instruction(ctx, instr, aco_opcode::v_ldexp_f32, dst); } else if (dst.regClass() == v2) {