From aaa858f9581e280eae7fbf39084042e8a9a0b8e1 Mon Sep 17 00:00:00 2001 From: Ella Stanforth Date: Sun, 29 Jun 2025 13:43:19 +0100 Subject: [PATCH] v3d/compiler: Implement 16bit normalised render targets. Reviewed-by: Iago Toral Quiroga Part-of: --- src/broadcom/compiler/v3d_compiler.h | 5 ++ src/broadcom/compiler/vir.c | 112 +++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index a041eeeb31e..7d2914e553d 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -434,6 +434,11 @@ struct v3d_fs_key { uint8_t swap_color_rb; /* Mask of which render targets need to be written as 32-bit floats */ uint8_t f32_color_rb; + /* Mask of which render targets need to be written as 16-bit unorms or snorms */ + uint8_t norm_16; + /* Mask of which render targets need to be written as snorms. */ + uint8_t snorm; + uint8_t ucp_enables; /* Color format information per render target. Only set when logic diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 6da21815c8b..8b58f0136ff 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -25,6 +25,7 @@ #include "v3d_compiler.h" #include "compiler/nir/nir_schedule.h" #include "compiler/nir/nir_builder.h" +#include "compiler/nir/nir_format_convert.h" #include "util/perf/cpu_trace.h" int @@ -1088,6 +1089,116 @@ v3d_nir_lower_gs_early(struct v3d_compile *c) NIR_PASS(_, c->s, nir_opt_constant_folding); } +static const unsigned bits_16[] = {16, 16, 16, 16}; + +static nir_def * +v3d_nir_float_to_snorm_16(nir_builder *b, nir_def *a, bool sw) +{ + if (sw) + return nir_format_float_to_snorm(b, a, bits_16); + + nir_def *out[4]; + for (unsigned i = 0; i < a->num_components; i++) + out[i] = nir_f2snorm_16_v3d(b, nir_channel(b, a, i)); + + return nir_vec(b, out, a->num_components); +} + +static nir_def * +v3d_nir_float_to_unorm_16(nir_builder *b, nir_def *a, bool sw) +{ + if (sw) + return nir_format_float_to_unorm(b, a, bits_16); + + nir_def *out[4]; + for (unsigned i = 0; i < a->num_components; i++) + out[i] = nir_f2unorm_16_v3d(b, nir_channel(b, a, i)); + + return nir_vec(b, out, a->num_components); +} + +static nir_def * +v3d_nir_unorm_to_float_16(nir_builder *b, nir_def *a, bool sw) +{ + if (sw) + return nir_format_unorm_to_float(b, a, bits_16); + + nir_def *out[4]; + for (unsigned i = 0; i < a->num_components; i++) + out[i] = nir_unorm2f_16_v3d(b, nir_channel(b, a, i)); + + return nir_vec(b, out, a->num_components); +} + +static nir_def * +v3d_nir_snorm_to_float_16(nir_builder *b, nir_def *a, bool sw) +{ + if (sw) + return nir_format_snorm_to_float(b, a, bits_16); + + nir_def *out[4]; + for (unsigned i = 0; i < a->num_components; i++) + out[i] = nir_snorm2f_16_v3d(b, nir_channel(b, a, i)); + + return nir_vec(b, out, a->num_components); +} + +static bool +lower_16bit_norm(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + if (intr->intrinsic != nir_intrinsic_load_output && + intr->intrinsic != nir_intrinsic_store_output) + return false; + + nir_io_semantics sem = nir_intrinsic_io_semantics(intr); + if (sem.location < FRAG_RESULT_DATA0) + return false; + + struct v3d_compile *c = data; + + unsigned rt = sem.location - FRAG_RESULT_DATA0; + bool norm = c->fs_key->norm_16 & (1 << rt); + + if (!norm) + return false; + + /* We do not have specific instructions for snorm packing on older + * hardware. + */ + bool needs_sw = c->devinfo->ver < 71; + bool norm_signed = c->fs_key->snorm & (1 << rt); + + bool is_store = intr->intrinsic == nir_intrinsic_store_output; + + nir_def *dst; + if (is_store) { + b->cursor = nir_before_instr(&intr->instr); + nir_def *src = intr->src[0].ssa; + if (norm_signed) + dst = v3d_nir_float_to_snorm_16(b, src, needs_sw); + else + dst = v3d_nir_float_to_unorm_16(b, src, needs_sw); + + nir_src_rewrite(&intr->src[0], dst); + } else { + b->cursor = nir_after_instr(&intr->instr); + if (norm_signed) + dst = v3d_nir_snorm_to_float_16(b, &intr->def, needs_sw); + else + dst = v3d_nir_unorm_to_float_16(b, &intr->def, needs_sw); + + nir_def_rewrite_uses_after(&intr->def, dst); + } + + return true; +} + +static bool +v3d_nir_lower_16bit_norm(nir_shader *s, struct v3d_compile *c) +{ + return nir_shader_intrinsics_pass(s, lower_16bit_norm, nir_metadata_control_flow, c); +} + static void v3d_nir_lower_fs_early(struct v3d_compile *c) { @@ -1113,6 +1224,7 @@ v3d_nir_lower_fs_early(struct v3d_compile *c) } NIR_PASS(_, c->s, v3d_nir_lower_logic_ops, c); + NIR_PASS(_, c->s, v3d_nir_lower_16bit_norm, c); NIR_PASS(_, c->s, v3d_nir_lower_load_output, c); }