From 250f4235464d9938f18d8c70d5ae3f2c10d15f32 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Fri, 14 Mar 2025 09:06:02 +0100 Subject: [PATCH] aco: clamp exponent of 16bit ldexp The hw uses only a 16bit int, but NIR's src is 32bit. Cc: mesa-stable Part-of: (cherry picked from commit a6675f35b20ac3a78a0d53d0851d59c464918fe0) --- .pick_status.json | 2 +- src/amd/compiler/aco_instruction_selection.cpp | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 9738ae1902d..29e8f25dc0d 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -784,7 +784,7 @@ "description": "aco: clamp exponent of 16bit ldexp", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 27a0fda6036..bb2645f5ff5 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -2823,7 +2823,21 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) } case nir_op_ldexp: { if (dst.regClass() == v2b) { - emit_vop2_instruction(ctx, instr, aco_opcode::v_ldexp_f16, dst, false); + nir_scalar scalar = nir_get_scalar(&instr->def, 0); + scalar = nir_scalar_chase_alu_src(scalar, 1); + + Temp exp; + + /* Convert the exponent to 16bit int with saturation. */ + if (nir_scalar_is_const(scalar)) { + int16_t clamped = MIN2(MAX2(nir_scalar_as_int(scalar), INT16_MIN), INT16_MAX); + exp = bld.copy(bld.def(v2b), Operand::c16(clamped)); + } else { + exp = get_alu_src(ctx, instr->src[1]); + exp = bld.vop3(aco_opcode::v_cvt_pk_i16_i32, bld.def(v2b), exp, Operand::c32(0)); + } + + bld.vop2(aco_opcode::v_ldexp_f16, Definition(dst), get_alu_src(ctx, instr->src[0]), exp); } else if (dst.regClass() == v1) { emit_vop3a_instruction(ctx, instr, aco_opcode::v_ldexp_f32, dst); } else if (dst.regClass() == v2) {