mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 13:30:12 +01:00
pan/bi: Stop using V2F32_TO_V2F16 on Valhall
On v11+, V2F32_TO_V2F16 doesn't exist anymore. This commit ensure we stop using it on every codepath except when a vectored conversion is prefered. (v9-v10) Instead, we use FADD.F32 to handle data conversion thanks to the swizzle defined for the destination. This also work on older Valhall gens, so let's follow that logic when we only have one component used. Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com> Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33608>
This commit is contained in:
parent
947264e18a
commit
b63ef74e73
1 changed files with 57 additions and 8 deletions
|
|
@ -278,6 +278,19 @@ bi_collect_v2i32(bi_builder *b, bi_index s0, bi_index s1)
|
||||||
return dst;
|
return dst;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bi_instr *
|
||||||
|
bi_f32_to_f16_to(bi_builder *b, bi_index dest, bi_index src)
|
||||||
|
{
|
||||||
|
/* Use V2F32_TO_V2F16 on Bifrost, FADD otherwise */
|
||||||
|
if (b->shader->arch < 9)
|
||||||
|
return bi_v2f32_to_v2f16_to(b, dest, src, src);
|
||||||
|
|
||||||
|
assert(dest.swizzle != BI_SWIZZLE_H01);
|
||||||
|
|
||||||
|
/* FADD with 0 and force convertion to F16 on Valhall and later */
|
||||||
|
return bi_fadd_f32_to(b, dest, src, bi_imm_u32(0));
|
||||||
|
}
|
||||||
|
|
||||||
static bi_index
|
static bi_index
|
||||||
bi_varying_src0_for_barycentric(bi_builder *b, nir_intrinsic_instr *intr)
|
bi_varying_src0_for_barycentric(bi_builder *b, nir_intrinsic_instr *intr)
|
||||||
{
|
{
|
||||||
|
|
@ -319,7 +332,19 @@ bi_varying_src0_for_barycentric(bi_builder *b, nir_intrinsic_instr *intr)
|
||||||
bi_imm_u32(8), BI_SPECIAL_NONE);
|
bi_imm_u32(8), BI_SPECIAL_NONE);
|
||||||
}
|
}
|
||||||
|
|
||||||
f16 = bi_v2f32_to_v2f16(b, f[0], f[1]);
|
/* On v11+, V2F32_TO_V2F16 is gone */
|
||||||
|
if (b->shader->arch >= 11) {
|
||||||
|
bi_index tmp[2];
|
||||||
|
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
tmp[i] = bi_half(bi_temp(b->shader), false);
|
||||||
|
bi_f32_to_f16_to(b, tmp[i], f[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
f16 = bi_mkvec_v2i16(b, tmp[0], tmp[1]);
|
||||||
|
} else {
|
||||||
|
f16 = bi_v2f32_to_v2f16(b, f[0], f[1]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return bi_v2f16_to_v2s16(b, f16);
|
return bi_v2f16_to_v2s16(b, f16);
|
||||||
|
|
@ -2651,13 +2676,26 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
||||||
case nir_op_f2f16:
|
case nir_op_f2f16:
|
||||||
case nir_op_f2f16_rtz:
|
case nir_op_f2f16_rtz:
|
||||||
case nir_op_f2f16_rtne: {
|
case nir_op_f2f16_rtne: {
|
||||||
|
/* Starting with v11, we don't have V2XXX_TO_V2F16, this should have been
|
||||||
|
* lowered before if there is more than one components */
|
||||||
|
assert(b->shader->arch < 11 || comps == 1);
|
||||||
assert(src_sz == 32);
|
assert(src_sz == 32);
|
||||||
bi_index idx = bi_src_index(&instr->src[0].src);
|
bi_index idx = bi_src_index(&instr->src[0].src);
|
||||||
bi_index s0 = bi_extract(b, idx, instr->src[0].swizzle[0]);
|
bi_index s0 = bi_extract(b, idx, instr->src[0].swizzle[0]);
|
||||||
bi_index s1 =
|
|
||||||
comps > 1 ? bi_extract(b, idx, instr->src[0].swizzle[1]) : s0;
|
|
||||||
|
|
||||||
bi_instr *I = bi_v2f32_to_v2f16_to(b, dst, s0, s1);
|
bi_instr *I;
|
||||||
|
|
||||||
|
/* Use V2F32_TO_V2F16 if vectorized */
|
||||||
|
if (comps == 2) {
|
||||||
|
/* Starting with v11, we don't have V2F32_TO_V2F16, this should have
|
||||||
|
* been lowered before if there is more than one components */
|
||||||
|
assert(b->shader->arch < 11);
|
||||||
|
bi_index s1 = bi_extract(b, idx, instr->src[0].swizzle[1]);
|
||||||
|
I = bi_v2f32_to_v2f16_to(b, dst, s0, s1);
|
||||||
|
} else {
|
||||||
|
assert(comps == 1);
|
||||||
|
I = bi_f32_to_f16_to(b, dst, s0);
|
||||||
|
}
|
||||||
|
|
||||||
/* Override rounding if explicitly requested. Otherwise, the
|
/* Override rounding if explicitly requested. Otherwise, the
|
||||||
* default rounding mode is selected by the builder. Depending
|
* default rounding mode is selected by the builder. Depending
|
||||||
|
|
@ -2952,7 +2990,8 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_op_fquantize2f16: {
|
case nir_op_fquantize2f16: {
|
||||||
bi_instr *f16 = bi_v2f32_to_v2f16_to(b, bi_temp(b->shader), s0, s0);
|
bi_instr *f16 =
|
||||||
|
bi_f32_to_f16_to(b, bi_half(bi_temp(b->shader), false), s0);
|
||||||
|
|
||||||
if (b->shader->arch < 9) {
|
if (b->shader->arch < 9) {
|
||||||
/* Bifrost has psuedo-ftz on conversions, that is lowered to an ftz
|
/* Bifrost has psuedo-ftz on conversions, that is lowered to an ftz
|
||||||
|
|
@ -2961,11 +3000,11 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
||||||
} else {
|
} else {
|
||||||
/* Valhall doesn't have clauses, and uses a separate flush
|
/* Valhall doesn't have clauses, and uses a separate flush
|
||||||
* instruction */
|
* instruction */
|
||||||
f16 = bi_flush_to(b, 16, bi_temp(b->shader), f16->dest[0]);
|
f16 = bi_flush_to(b, 16, bi_half(bi_temp(b->shader), false), f16->dest[0]);
|
||||||
f16->ftz = true;
|
f16->ftz = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bi_instr *f32 = bi_f16_to_f32_to(b, dst, bi_half(f16->dest[0], false));
|
bi_instr *f32 = bi_f16_to_f32_to(b, dst, f16->dest[0]);
|
||||||
|
|
||||||
if (b->shader->arch < 9)
|
if (b->shader->arch < 9)
|
||||||
f32->ftz = true;
|
f32->ftz = true;
|
||||||
|
|
@ -4797,6 +4836,8 @@ bi_lower_bit_size(const nir_instr *instr, UNUSED void *data)
|
||||||
static uint8_t
|
static uint8_t
|
||||||
bi_vectorize_filter(const nir_instr *instr, const void *data)
|
bi_vectorize_filter(const nir_instr *instr, const void *data)
|
||||||
{
|
{
|
||||||
|
unsigned gpu_id = *((unsigned *)data);
|
||||||
|
|
||||||
/* Defaults work for everything else */
|
/* Defaults work for everything else */
|
||||||
if (instr->type != nir_instr_type_alu)
|
if (instr->type != nir_instr_type_alu)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
@ -4817,6 +4858,14 @@ bi_vectorize_filter(const nir_instr *instr, const void *data)
|
||||||
case nir_op_extract_i16:
|
case nir_op_extract_i16:
|
||||||
case nir_op_insert_u16:
|
case nir_op_insert_u16:
|
||||||
return 1;
|
return 1;
|
||||||
|
/* On v11+, we lost all packed F16 conversions */
|
||||||
|
case nir_op_f2f16:
|
||||||
|
case nir_op_f2f16_rtz:
|
||||||
|
case nir_op_f2f16_rtne:
|
||||||
|
if (pan_arch(gpu_id) >= 11)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -5041,7 +5090,7 @@ bi_optimize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend)
|
||||||
NIR_PASS(progress, nir, bifrost_nir_opt_boolean_bitwise);
|
NIR_PASS(progress, nir, bifrost_nir_opt_boolean_bitwise);
|
||||||
|
|
||||||
NIR_PASS(progress, nir, nir_lower_alu_to_scalar, bi_scalarize_filter, NULL);
|
NIR_PASS(progress, nir, nir_lower_alu_to_scalar, bi_scalarize_filter, NULL);
|
||||||
NIR_PASS(progress, nir, nir_opt_vectorize, bi_vectorize_filter, NULL);
|
NIR_PASS(progress, nir, nir_opt_vectorize, bi_vectorize_filter, &gpu_id);
|
||||||
NIR_PASS(progress, nir, nir_lower_bool_to_bitsize);
|
NIR_PASS(progress, nir, nir_lower_bool_to_bitsize);
|
||||||
|
|
||||||
/* Prepass to simplify instruction selection */
|
/* Prepass to simplify instruction selection */
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue