mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-11 01:30:25 +01:00
panfrost: fix large int32->float16 conversions
On vulkan, truncating to S/U16 before converting is not valid, because out-of-range conversions are specified to be correctly rounded. IEEE 754 requires that out-of-range values round to ±inf with RTNE and ±F16_MAX with RTZ. On gl, truncating is valid for U16->F16, because out-of-range int->float conversions are undefined behavior. For S16->F16, it is not valid because S16_MAX < F16_MAX, so some in-range values will be truncated as well. Instead, just handle S/U16->F16 as S/U16->F32->F16. Fixes dEQP-VK.spirv_assembly.instruction.compute.convertstof.int32_to_float16_* when shaderFloat16 is enabled in panvk. Signed-off-by: Benjamin Lee <benjamin.lee@collabora.com> Fixes:be74b84e6f("pan/bi: Fill in some more conversions") Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Acked-by: Rebecca Mckeever <rebecca.mckeever@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33637> (cherry picked from commita33cd3def2)
This commit is contained in:
parent
919e3443e8
commit
91c473e49a
3 changed files with 28 additions and 22 deletions
|
|
@ -54,7 +54,7 @@
|
|||
"description": "panfrost: fix large int32->float16 conversions",
|
||||
"nominated": true,
|
||||
"nomination_type": 2,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": "be74b84e6f81c6a75e8488c0757ac7f911f4931d",
|
||||
"notes": null
|
||||
|
|
|
|||
|
|
@ -2652,13 +2652,12 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
|||
return;
|
||||
}
|
||||
|
||||
/* While we do not have a direct V2U32_TO_V2F16 instruction, lowering to
|
||||
* MKVEC.v2i16 + V2U16_TO_V2F16 is more efficient on Bifrost than
|
||||
* scalarizing due to scheduling (equal cost on Valhall). Additionally
|
||||
* if the source is replicated the MKVEC.v2i16 can be optimized out.
|
||||
*/
|
||||
case nir_op_u2f16:
|
||||
case nir_op_i2f16: {
|
||||
/* Pre-v11, we can get vector i2f32 by lowering 32-bit vector i2f16 to
|
||||
* i2f32 + f2f16 in bifrost_nir_lower_algebraic_late, which runs after
|
||||
* nir_opt_vectorize. We don't scalarize i2f32 earlier because we have
|
||||
* vector V2F16_TO_V2F32. */
|
||||
case nir_op_i2f32:
|
||||
case nir_op_u2f32: {
|
||||
if (!(src_sz == 32 && comps == 2))
|
||||
break;
|
||||
|
||||
|
|
@ -2667,15 +2666,16 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
|||
bi_index s0 = bi_extract(b, idx, src->swizzle[0]);
|
||||
bi_index s1 = bi_extract(b, idx, src->swizzle[1]);
|
||||
|
||||
bi_index t =
|
||||
(src->swizzle[0] == src->swizzle[1])
|
||||
? bi_half(s0, false)
|
||||
: bi_mkvec_v2i16(b, bi_half(s0, false), bi_half(s1, false));
|
||||
bi_index d0, d1;
|
||||
if (instr->op == nir_op_i2f32) {
|
||||
d0 = bi_s32_to_f32(b, s0);
|
||||
d1 = bi_s32_to_f32(b, s1);
|
||||
} else {
|
||||
d0 = bi_u32_to_f32(b, s0);
|
||||
d1 = bi_u32_to_f32(b, s1);
|
||||
}
|
||||
|
||||
if (instr->op == nir_op_u2f16)
|
||||
bi_v2u16_to_v2f16_to(b, dst, t);
|
||||
else
|
||||
bi_v2s16_to_v2f16_to(b, dst, t);
|
||||
bi_collect_v2i32_to(b, dst, d0, d1);
|
||||
|
||||
return;
|
||||
}
|
||||
|
|
@ -2969,9 +2969,10 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
|||
break;
|
||||
|
||||
case nir_op_u2f16:
|
||||
if (src_sz == 32)
|
||||
bi_v2u16_to_v2f16_to(b, dst, bi_half(s0, false));
|
||||
else if (src_sz == 16)
|
||||
/* V2I32_TO_V2F16 does not exist */
|
||||
assert((src_sz == 16 || src_sz == 8) && "should be lowered");
|
||||
|
||||
if (src_sz == 16)
|
||||
bi_v2u16_to_v2f16_to(b, dst, s0);
|
||||
else if (src_sz == 8)
|
||||
bi_v2u8_to_v2f16_to(b, dst, s0);
|
||||
|
|
@ -2987,9 +2988,10 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
|||
break;
|
||||
|
||||
case nir_op_i2f16:
|
||||
if (src_sz == 32)
|
||||
bi_v2s16_to_v2f16_to(b, dst, bi_half(s0, false));
|
||||
else if (src_sz == 16)
|
||||
/* V2I32_TO_V2F16 does not exist */
|
||||
assert((src_sz == 16 || src_sz == 8) && "should be lowered");
|
||||
|
||||
if (src_sz == 16)
|
||||
bi_v2s16_to_v2f16_to(b, dst, s0);
|
||||
else if (src_sz == 8)
|
||||
bi_v2s8_to_v2f16_to(b, dst, s0);
|
||||
|
|
|
|||
|
|
@ -75,6 +75,10 @@ algebraic_late = [
|
|||
# XXX: Duplicate of nir_lower_pack
|
||||
(('unpack_64_2x32', a), ('vec2', ('unpack_64_2x32_split_x', a),
|
||||
('unpack_64_2x32_split_y', a))),
|
||||
|
||||
# We don't have S32_TO_F16 on any arch
|
||||
(('i2f16', 'a@32'), ('f2f16', ('i2f32', a))),
|
||||
(('u2f16', 'a@32'), ('f2f16', ('u2f32', a))),
|
||||
]
|
||||
|
||||
# Handling all combinations of boolean and float sizes for b2f is nontrivial.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue