v3d/compiler: Implement 16bit normalised render targets.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35820>
2026-03-17 19:00:33 +01:00 · 2025-06-29 13:43:19 +01:00 · 2025-06-29 13:43:19 +01:00 · aaa858f958
commit aaa858f958
parent c9e9d72cce
2 changed files with 117 additions and 0 deletions
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@ -434,6 +434,11 @@ struct v3d_fs_key {
        uint8_t swap_color_rb;
        /* Mask of which render targets need to be written as 32-bit floats */
        uint8_t f32_color_rb;
+        /* Mask of which render targets need to be written as 16-bit unorms or snorms */
+        uint8_t norm_16;
+        /* Mask of which render targets need to be written as snorms. */
+        uint8_t snorm;
+
        uint8_t ucp_enables;

        /* Color format information per render target. Only set when logic
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@ -25,6 +25,7 @@
 #include "v3d_compiler.h"
 #include "compiler/nir/nir_schedule.h"
 #include "compiler/nir/nir_builder.h"
+#include "compiler/nir/nir_format_convert.h"
 #include "util/perf/cpu_trace.h"

 int
@ -1088,6 +1089,116 @@ v3d_nir_lower_gs_early(struct v3d_compile *c)
        NIR_PASS(_, c->s, nir_opt_constant_folding);
 }

+static const unsigned bits_16[] = {16, 16, 16, 16};
+
+static nir_def *
+v3d_nir_float_to_snorm_16(nir_builder *b, nir_def *a, bool sw)
+{
+        if (sw)
+               return nir_format_float_to_snorm(b, a, bits_16);
+
+        nir_def *out[4];
+        for (unsigned i = 0; i < a->num_components; i++)
+                out[i] = nir_f2snorm_16_v3d(b, nir_channel(b, a, i));
+
+        return nir_vec(b, out, a->num_components);
+}
+
+static nir_def *
+v3d_nir_float_to_unorm_16(nir_builder *b, nir_def *a, bool sw)
+{
+        if (sw)
+               return nir_format_float_to_unorm(b, a, bits_16);
+
+        nir_def *out[4];
+        for (unsigned i = 0; i < a->num_components; i++)
+                out[i] = nir_f2unorm_16_v3d(b, nir_channel(b, a, i));
+
+        return nir_vec(b, out, a->num_components);
+}
+
+static nir_def *
+v3d_nir_unorm_to_float_16(nir_builder *b, nir_def *a, bool sw)
+{
+        if (sw)
+                return nir_format_unorm_to_float(b, a, bits_16);
+
+        nir_def *out[4];
+        for (unsigned i = 0; i < a->num_components; i++)
+                out[i] = nir_unorm2f_16_v3d(b, nir_channel(b, a, i));
+
+        return nir_vec(b, out, a->num_components);
+}
+
+static nir_def *
+v3d_nir_snorm_to_float_16(nir_builder *b, nir_def *a, bool sw)
+{
+        if (sw)
+                return nir_format_snorm_to_float(b, a, bits_16);
+
+        nir_def *out[4];
+        for (unsigned i = 0; i < a->num_components; i++)
+                out[i] = nir_snorm2f_16_v3d(b, nir_channel(b, a, i));
+
+        return nir_vec(b, out, a->num_components);
+}
+
+static bool
+lower_16bit_norm(nir_builder *b, nir_intrinsic_instr *intr, void *data)
+{
+        if (intr->intrinsic != nir_intrinsic_load_output &&
+            intr->intrinsic != nir_intrinsic_store_output)
+                return false;
+
+        nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
+        if (sem.location < FRAG_RESULT_DATA0)
+                return false;
+
+        struct v3d_compile *c = data;
+
+        unsigned rt = sem.location - FRAG_RESULT_DATA0;
+        bool norm = c->fs_key->norm_16 & (1 << rt);
+
+        if (!norm)
+                return false;
+
+        /* We do not have specific instructions for snorm packing on older
+         * hardware.
+         */
+        bool needs_sw = c->devinfo->ver < 71;
+        bool norm_signed = c->fs_key->snorm & (1 << rt);
+
+        bool is_store = intr->intrinsic == nir_intrinsic_store_output;
+
+        nir_def *dst;
+        if (is_store) {
+                b->cursor = nir_before_instr(&intr->instr);
+                nir_def *src = intr->src[0].ssa;
+                if (norm_signed)
+                        dst = v3d_nir_float_to_snorm_16(b, src, needs_sw);
+                else
+                        dst = v3d_nir_float_to_unorm_16(b, src, needs_sw);
+
+                nir_src_rewrite(&intr->src[0], dst);
+        } else {
+                b->cursor = nir_after_instr(&intr->instr);
+                if (norm_signed)
+                        dst = v3d_nir_snorm_to_float_16(b, &intr->def, needs_sw);
+                else
+                        dst = v3d_nir_unorm_to_float_16(b, &intr->def, needs_sw);
+
+                nir_def_rewrite_uses_after(&intr->def, dst);
+        }
+
+        return true;
+}
+
+static bool
+v3d_nir_lower_16bit_norm(nir_shader *s, struct v3d_compile *c)
+{
+        return nir_shader_intrinsics_pass(s, lower_16bit_norm, nir_metadata_control_flow, c);
+}
+
 static void
 v3d_nir_lower_fs_early(struct v3d_compile *c)
 {
@ -1113,6 +1224,7 @@ v3d_nir_lower_fs_early(struct v3d_compile *c)
        }

        NIR_PASS(_, c->s, v3d_nir_lower_logic_ops, c);
+        NIR_PASS(_, c->s, v3d_nir_lower_16bit_norm, c);
        NIR_PASS(_, c->s, v3d_nir_lower_load_output, c);
 }