panfrost: Don't use nir_variable to link varyings

NIR deemphasizes nir_variable. We want to transition off it. Instead of walking the list of variables and playing games with the GLSL types to collect varying information, walk the list of instructions and use the I/O semantics to collect similar information. In addition to avoiding the reliance on nir_variable, this fixes handling of struct varyings under certain circumstances. Such programs are compiled by the GLES3.1 CTS but not used, so without this fix, the affected tests would regress when precompiling. Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19363>
2025-12-23 22:00:13 +01:00 · 2022-10-14 17:49:10 -04:00 · 2022-10-14 17:49:10 -04:00 · 2316b80d77
commit 2316b80d77
parent 93bf7104d0
6 changed files with 201 additions and 123 deletions
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@ -5295,6 +5295,8 @@ bifrost_compile_shader_nir(nir_shader *nir,
        info->tls_size = nir->scratch_size;
        info->vs.idvs = bi_should_idvs(nir, inputs);
        pan_nir_collect_varyings(nir, info);
        if (info->vs.idvs) {
                bi_compile_variant(nir, inputs, binary, sysval_to_id, info, BI_IDVS_POSITION);
                bi_compile_variant(nir, inputs, binary, sysval_to_id, info, BI_IDVS_VARYING);
--- a/src/panfrost/lib/pan_shader.c
+++ b/src/panfrost/lib/pan_shader.c
@ -42,123 +42,6 @@ GENX(pan_shader_get_compiler_options)(void)
 #endif
 }
 #if PAN_ARCH <= 7
 static enum pipe_format
 varying_format(nir_alu_type t, unsigned ncomps)
 {
 #define VARYING_FORMAT(ntype, nsz, ptype, psz) \
        { \
                .type = nir_type_ ## ntype ## nsz, \
                .formats = { \
                        PIPE_FORMAT_R ## psz ## _ ## ptype, \
                        PIPE_FORMAT_R ## psz ## G ## psz ## _ ## ptype, \
                        PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz ## _ ## ptype, \
                        PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz  ## A ## psz ## _ ## ptype, \
                } \
        }
        static const struct {
                nir_alu_type type;
                enum pipe_format formats[4];
        } conv[] = {
                VARYING_FORMAT(float, 32, FLOAT, 32),
                VARYING_FORMAT(int, 32, SINT, 32),
                VARYING_FORMAT(uint, 32, UINT, 32),
                VARYING_FORMAT(float, 16, FLOAT, 16),
                VARYING_FORMAT(int, 16, SINT, 16),
                VARYING_FORMAT(uint, 16, UINT, 16),
                VARYING_FORMAT(int, 8, SINT, 8),
                VARYING_FORMAT(uint, 8, UINT, 8),
                VARYING_FORMAT(bool, 32, UINT, 32),
                VARYING_FORMAT(bool, 16, UINT, 16),
                VARYING_FORMAT(bool, 8, UINT, 8),
                VARYING_FORMAT(bool, 1, UINT, 8),
        };
 #undef VARYING_FORMAT
        assert(ncomps > 0 && ncomps <= ARRAY_SIZE(conv[0].formats));
        for (unsigned i = 0; i < ARRAY_SIZE(conv); i++) {
                if (conv[i].type == t)
                        return conv[i].formats[ncomps - 1];
        }
        return PIPE_FORMAT_NONE;
 }
 static void
 collect_varyings(nir_shader *s, nir_variable_mode varying_mode,
                 struct pan_shader_varying *varyings,
                 unsigned *varying_count)
 {
        *varying_count = 0;
        unsigned comps[PAN_MAX_VARYINGS] = { 0 };
        nir_foreach_variable_with_modes(var, s, varying_mode) {
                unsigned loc = var->data.driver_location;
                const struct glsl_type *column =
                        glsl_without_array_or_matrix(var->type);
                unsigned chan = glsl_get_components(column);
                /* If we have a fractional location added, we need to increase the size
                 * so it will fit, i.e. a vec3 in YZW requires us to allocate a vec4.
                 * We could do better but this is an edge case as it is, normally
                 * packed varyings will be aligned.
                 */
                chan += var->data.location_frac;
                comps[loc] = MAX2(comps[loc], chan);
        }
        nir_foreach_variable_with_modes(var, s, varying_mode) {
                unsigned loc = var->data.driver_location;
                unsigned sz = glsl_count_attribute_slots(var->type, FALSE);
                const struct glsl_type *column =
                        glsl_without_array_or_matrix(var->type);
                enum glsl_base_type base_type = glsl_get_base_type(column);
                unsigned chan = comps[loc];
                nir_alu_type type = nir_get_nir_type_for_glsl_base_type(base_type);
                type = nir_alu_type_get_base_type(type);
                /* Can't do type conversion since GLSL IR packs in funny ways */
                if (var->data.interpolation == INTERP_MODE_FLAT)
                        type = nir_type_uint;
                /* Point size is handled specially on Valhall (with malloc
                 * IDVS).. probably though this entire linker should be bypassed
                 * for Valhall in the future.
                 */
                if (PAN_ARCH >= 9 && var->data.location == VARYING_SLOT_PSIZ)
                        continue;
                /* Demote to fp16 where possible. int16 varyings are TODO as the hw
                 * will saturate instead of wrap which is not conformant, so we need to
                 * insert i2i16/u2u16 instructions before the st_vary_32i/32u to get
                 * the intended behaviour.
                 */
                if (type == nir_type_float &&
                    (var->data.precision == GLSL_PRECISION_MEDIUM ||
                     var->data.precision == GLSL_PRECISION_LOW)) {
                        type |= 16;
                } else {
                        type |= 32;
                }
                enum pipe_format format = varying_format(type, chan);
                assert(format != PIPE_FORMAT_NONE);
                for (int c = 0; c < sz; ++c) {
                        assert(loc + c < PAN_MAX_VARYINGS);
                        varyings[loc + c].location = var->data.location + c;
                        varyings[loc + c].format = format;
                }
                *varying_count = MAX2(*varying_count, loc + sz);
        }
 }
 #endif
 #if PAN_ARCH >= 6
 static enum mali_register_file_format
 bifrost_blend_type_from_nir(nir_alu_type nir_type)
@ -249,9 +132,6 @@ GENX(pan_shader_compile)(nir_shader *s,
 #if PAN_ARCH >= 9
                info->varyings.output_count =
                        util_last_bit(s->info.outputs_written >> VARYING_SLOT_VAR0);
 #else
                collect_varyings(s, nir_var_shader_out, info->varyings.output,
                                 &info->varyings.output_count);
 #endif
                break;
        case MESA_SHADER_FRAGMENT:
@ -308,9 +188,6 @@ GENX(pan_shader_compile)(nir_shader *s,
 #if PAN_ARCH >= 9
                info->varyings.output_count =
                        util_last_bit(s->info.outputs_read >> VARYING_SLOT_VAR0);
 #else
                collect_varyings(s, nir_var_shader_in, info->varyings.input,
                                 &info->varyings.input_count);
 #endif
                break;
        default:
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@ -3203,6 +3203,9 @@ midgard_compile_shader_nir(nir_shader *nir,
        NIR_PASS_V(nir, midgard_nir_lower_global_load);
        /* Collect varyings after lowering I/O */
        pan_nir_collect_varyings(nir, info);
        /* Optimisation passes */
        optimise_nir(nir, ctx->quirks, inputs->is_blend, inputs->is_blit);
--- a/src/panfrost/util/meson.build
+++ b/src/panfrost/util/meson.build
@ -23,6 +23,7 @@ libpanfrost_util_files = files(
  'lcra.c',
  'lcra.h',
  'nir_mod_helpers.c',
  'pan_collect_varyings.c',
  'pan_ir.c',
  'pan_ir.h',
  'pan_liveness.c',
--- a/src/panfrost/util/pan_collect_varyings.c
+++ b/src/panfrost/util/pan_collect_varyings.c
@ -0,0 +1,193 @@
 /*
 * Copyright (c) 2022 Amazon.com, Inc. or its affiliates.
 * Copyright (C) 2019-2022 Collabora, Ltd.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #include "compiler/nir/nir.h"
 #include "compiler/nir/nir_builder.h"
 #include "pan_ir.h"
 static enum pipe_format
 varying_format(nir_alu_type t, unsigned ncomps)
 {
        assert(ncomps >= 1 && ncomps <= 4);
 #define VARYING_FORMAT(ntype, nsz, ptype, psz) \
        { \
                .type = nir_type_ ## ntype ## nsz, \
                .formats = { \
                        PIPE_FORMAT_R ## psz ## _ ## ptype, \
                        PIPE_FORMAT_R ## psz ## G ## psz ## _ ## ptype, \
                        PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz ## _ ## ptype, \
                        PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz  ## A ## psz ## _ ## ptype, \
                } \
        }
        static const struct {
                nir_alu_type type;
                enum pipe_format formats[4];
        } conv[] = {
                VARYING_FORMAT(float, 32, FLOAT, 32),
                VARYING_FORMAT(uint, 32, UINT, 32),
                VARYING_FORMAT(float, 16, FLOAT, 16),
        };
 #undef VARYING_FORMAT
        assert(ncomps > 0 && ncomps <= ARRAY_SIZE(conv[0].formats));
        for (unsigned i = 0; i < ARRAY_SIZE(conv); i++) {
                if (conv[i].type == t)
                        return conv[i].formats[ncomps - 1];
        }
        unreachable("Invalid type");
 }
 struct slot_info {
        nir_alu_type type;
        unsigned count;
        unsigned index;
 };
 static bool
 walk_varyings(UNUSED nir_builder *b, nir_instr *instr, void *data)
 {
        struct slot_info *slots = data;
        if (instr->type != nir_instr_type_intrinsic)
                return false;
        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
        unsigned count;
        /* Only consider intrinsics that access varyings */
        switch (intr->intrinsic) {
        case nir_intrinsic_store_output:
                if (b->shader->info.stage != MESA_SHADER_VERTEX)
                        return false;
                count = nir_src_num_components(intr->src[0]);
                break;
        case nir_intrinsic_load_input:
        case nir_intrinsic_load_interpolated_input:
                if (b->shader->info.stage != MESA_SHADER_FRAGMENT)
                        return false;
                count = nir_dest_num_components(intr->dest);
                break;
        default:
                return false;
        }
        nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
        if (sem.no_varying)
                return false;
        /* In a fragment shader, flat shading is lowered to load_input but
         * interpolation is lowered to load_interpolated_input, so we can check
         * the intrinsic to distinguish.
         *
         * In a vertex shader, we consider everything flat, as the information
         * will not contribute to the final linked varyings -- flatness is used
         * only to determine the type, and the GL linker uses the type from the
         * fragment shader instead.
         */
        bool flat = (intr->intrinsic != nir_intrinsic_load_interpolated_input);
        nir_alu_type type = flat ? nir_type_uint : nir_type_float;
        /* Demote interpolated float varyings to fp16 where possible. We do not
         * demote flat varyings, including integer varyings, due to various
         * issues with the Midgard hardware behaviour and TGSI shaders, as well
         * as having no demonstrable benefit in practice.
         */
        if (type == nir_type_float && sem.medium_precision)
                type |= 16;
        else
                type |= 32;
        /* Count currently contains the number of components accessed by this
         * intrinsics. However, we may be accessing a fractional location,
         * indicating by the NIR component. Add that in. The final value be the
         * maximum (component + count), an upper bound on the number of
         * components possibly used.
         */
        count += nir_intrinsic_component(intr);
        /* Consider each slot separately */
        for (unsigned offset = 0; offset < sem.num_slots; ++offset) {
                unsigned location = sem.location + offset;
                unsigned index = nir_intrinsic_base(intr) + offset;
                if (slots[location].type) {
                        assert(slots[location].type == type);
                        assert(slots[location].index == index);
                } else {
                        slots[location].type = type;
                        slots[location].index = index;
                }
                slots[location].count = MAX2(slots[location].count, count);
        }
        return false;
 }
 void
 pan_nir_collect_varyings(nir_shader *s, struct pan_shader_info *info)
 {
        if (s->info.stage != MESA_SHADER_VERTEX &&
            s->info.stage != MESA_SHADER_FRAGMENT)
                return;
        struct slot_info slots[64] = { 0 };
        nir_shader_instructions_pass(s, walk_varyings, nir_metadata_all, slots);
        struct pan_shader_varying *varyings =
                (s->info.stage == MESA_SHADER_VERTEX) ?
                info->varyings.output :
                info->varyings.input;
        unsigned count = 0;
        for (unsigned i = 0; i < ARRAY_SIZE(slots); ++i) {
                if (!slots[i].type)
                        continue;
                enum pipe_format format =
                        varying_format(slots[i].type, slots[i].count);
                assert(format != PIPE_FORMAT_NONE);
                unsigned index = slots[i].index;
                count = MAX2(count, index + 1);
                varyings[index].location = i;
                varyings[index].format = format;
        }
        if (s->info.stage == MESA_SHADER_VERTEX)
                info->varyings.output_count = count;
        else
                info->varyings.input_count = count;
 }
--- a/src/panfrost/util/pan_ir.h
+++ b/src/panfrost/util/pan_ir.h
@ -510,6 +510,8 @@ bool pan_lower_helper_invocation(nir_shader *shader);
 bool pan_lower_sample_pos(nir_shader *shader);
 bool pan_lower_xfb(nir_shader *nir);
 void pan_nir_collect_varyings(nir_shader *s, struct pan_shader_info *info);
 /*
 * Helper returning the subgroup size. Generally, this is equal to the number of
 * threads in a warp. For Midgard (including warping models), this returns 1, as