aco: clamp exponent of 16bit ldexp

The hw uses only a 16bit int, but NIR's src is 32bit.

Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34073>
(cherry picked from commit a6675f35b2)
This commit is contained in:
Georg Lehmann 2025-03-14 09:06:02 +01:00 committed by Eric Engestrom
parent b4cdd0e929
commit 250f423546
2 changed files with 16 additions and 2 deletions

View file

@ -784,7 +784,7 @@
"description": "aco: clamp exponent of 16bit ldexp",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": null,
"notes": null

View file

@ -2823,7 +2823,21 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
}
case nir_op_ldexp: {
if (dst.regClass() == v2b) {
emit_vop2_instruction(ctx, instr, aco_opcode::v_ldexp_f16, dst, false);
nir_scalar scalar = nir_get_scalar(&instr->def, 0);
scalar = nir_scalar_chase_alu_src(scalar, 1);
Temp exp;
/* Convert the exponent to 16bit int with saturation. */
if (nir_scalar_is_const(scalar)) {
int16_t clamped = MIN2(MAX2(nir_scalar_as_int(scalar), INT16_MIN), INT16_MAX);
exp = bld.copy(bld.def(v2b), Operand::c16(clamped));
} else {
exp = get_alu_src(ctx, instr->src[1]);
exp = bld.vop3(aco_opcode::v_cvt_pk_i16_i32, bld.def(v2b), exp, Operand::c32(0));
}
bld.vop2(aco_opcode::v_ldexp_f16, Definition(dst), get_alu_src(ctx, instr->src[0]), exp);
} else if (dst.regClass() == v1) {
emit_vop3a_instruction(ctx, instr, aco_opcode::v_ldexp_f32, dst);
} else if (dst.regClass() == v2) {