pan/compiler: Replace bi_lower_ldexp16 with algebraic pass

Signed-off-by: Lorenzo Rossi <lorenzo.rossi@collabora.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40717>
This commit is contained in:
Lorenzo Rossi 2026-03-30 10:47:54 +02:00 committed by Marge Bot
parent 4fe64d23b8
commit f19b9eddb6
2 changed files with 23 additions and 46 deletions

View file

@ -5972,49 +5972,6 @@ bifrost_nir_lower_load_output(nir_shader *nir)
nir_metadata_control_flow, NULL);
}
/* Bifrost LDEXP.v2f16 takes i16 exponent, while nir_op_ldexp takes i32. Lower
* to nir_op_ldexp16_pan. */
static bool
bi_lower_ldexp16(nir_builder *b, nir_alu_instr *alu, UNUSED void *data)
{
if (alu->op != nir_op_ldexp || alu->def.bit_size != 16)
return false;
b->cursor = nir_before_instr(&alu->instr);
nir_def *x = nir_ssa_for_alu_src(b, alu, 0);
nir_def *exp32 = nir_ssa_for_alu_src(b, alu, 1);
/* From the GLSL 4.60 spec (section 8.3):
*
* "If exp is greater than +128 (single-precision) or +1024
* (double-precision), the value returned is undefined. If exp is less
* than -126 (single- precision) or -1022 (double-precision), the value
* returned may be flushed to zero."
*
* So we can't just truncate the exponent. Overflow is undefined behavior,
* but we need to return signed zero on underflow. If exp32 < INT16_MIN, we
* can use any 16-bit exponent that's sufficiently small to send all f16
* values to zero.
*
* If we test exp32 < INT16_MIN directly, the comparison could not be
* vectorized, so instead we test the upper half.
*/
nir_def *exp16_high = nir_unpack_32_2x16_split_y(b, exp32);
nir_def *underflow = nir_ilt16(b, exp16_high, nir_imm_intN_t(b, -1, 16));
/* TODO: some possible values for this constant can be encoded as small
* immediates on valhall. None of the usable immediates have replicated i16
* lanes, but for example 0xFAFCFDFE would be {-1284,-514}, both of which
* are small enough. */
nir_def *min_exp = nir_imm_intN_t(b, -127, 16);
nir_def *exp16 = nir_b16csel(b, underflow, min_exp, nir_i2i16(b, exp32));
nir_def_replace(&alu->def, nir_ldexp16_pan(b, x, exp16));
return true;
}
void
bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id)
{
@ -6171,9 +6128,6 @@ bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id)
NIR_PASS(_, nir, nir_lower_idiv,
&(nir_lower_idiv_options){.allow_fp16 = true});
NIR_PASS(_, nir, nir_shader_alu_pass, bi_lower_ldexp16,
nir_metadata_control_flow, NULL);
NIR_PASS(_, nir, nir_lower_alu_width, bi_vectorize_filter, &gpu_id);
NIR_PASS(_, nir, nir_lower_load_const_to_scalar);
NIR_PASS(_, nir, nir_lower_phis_to_scalar, bi_vectorize_filter, &gpu_id);

View file

@ -122,6 +122,29 @@ for bsz in [8, 16, 32]:
((f'b2f{fsz}', f'a@{bsz}'), (f'b{fsz}csel', a_fsz, 1.0, 0.0)),
]
# Bifrost LDEXP.v2f16 takes i16 exponent, while nir_op_ldexp takes i32. Lower
# to nir_op_ldexp16_pan.
#
# From the GLSL 4.60 spec (section 8.3):
# "If exp is greater than +128 (single-precision) or +1024
# (double-precision), the value returned is undefined. If exp is less
# than -126 (single- precision) or -1022 (double-precision), the value
# returned may be flushed to zero."
#
# So we can't just truncate the exponent. Overflow is undefined behavior, but
# we need to return signed zero on underflow. If exp32 < INT16_MIN, we can use
# any 16-bit exponent that's sufficiently small to send all f16 values to zero.
#
# If we test exp32 < INT16_MIN directly, the comparison could not be
# vectorized, so instead we test the upper half.
# TODO: some possible values for -127 can be encoded as small
# immediates on valhall. None of the usable immediates have replicated
# i16 lanes, but for example 0xFAFCFDFE would be {-1284,-514}, both of
# which are small enough.
algebraic_late += [
(('ldexp', 'a@16', b), ('ldexp16_pan', a, ('b16csel', ('ilt16', ('unpack_32_2x16_split_y', b), -1), -127, ('i2i16', b))))
]
def main():
parser = argparse.ArgumentParser()