diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index d63f45b5afa..f0aab763e38 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -5295,6 +5295,8 @@ bifrost_compile_shader_nir(nir_shader *nir, info->tls_size = nir->scratch_size; info->vs.idvs = bi_should_idvs(nir, inputs); + pan_nir_collect_varyings(nir, info); + if (info->vs.idvs) { bi_compile_variant(nir, inputs, binary, sysval_to_id, info, BI_IDVS_POSITION); bi_compile_variant(nir, inputs, binary, sysval_to_id, info, BI_IDVS_VARYING); diff --git a/src/panfrost/lib/pan_shader.c b/src/panfrost/lib/pan_shader.c index 0158291cdb3..73c00befe76 100644 --- a/src/panfrost/lib/pan_shader.c +++ b/src/panfrost/lib/pan_shader.c @@ -42,123 +42,6 @@ GENX(pan_shader_get_compiler_options)(void) #endif } -#if PAN_ARCH <= 7 -static enum pipe_format -varying_format(nir_alu_type t, unsigned ncomps) -{ -#define VARYING_FORMAT(ntype, nsz, ptype, psz) \ - { \ - .type = nir_type_ ## ntype ## nsz, \ - .formats = { \ - PIPE_FORMAT_R ## psz ## _ ## ptype, \ - PIPE_FORMAT_R ## psz ## G ## psz ## _ ## ptype, \ - PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz ## _ ## ptype, \ - PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz ## A ## psz ## _ ## ptype, \ - } \ - } - - static const struct { - nir_alu_type type; - enum pipe_format formats[4]; - } conv[] = { - VARYING_FORMAT(float, 32, FLOAT, 32), - VARYING_FORMAT(int, 32, SINT, 32), - VARYING_FORMAT(uint, 32, UINT, 32), - VARYING_FORMAT(float, 16, FLOAT, 16), - VARYING_FORMAT(int, 16, SINT, 16), - VARYING_FORMAT(uint, 16, UINT, 16), - VARYING_FORMAT(int, 8, SINT, 8), - VARYING_FORMAT(uint, 8, UINT, 8), - VARYING_FORMAT(bool, 32, UINT, 32), - VARYING_FORMAT(bool, 16, UINT, 16), - VARYING_FORMAT(bool, 8, UINT, 8), - VARYING_FORMAT(bool, 1, UINT, 8), - }; -#undef VARYING_FORMAT - - assert(ncomps > 0 && ncomps <= ARRAY_SIZE(conv[0].formats)); - - for (unsigned i = 0; i < ARRAY_SIZE(conv); i++) { - if (conv[i].type == t) - return conv[i].formats[ncomps - 1]; - } - - return PIPE_FORMAT_NONE; -} - -static void -collect_varyings(nir_shader *s, nir_variable_mode varying_mode, - struct pan_shader_varying *varyings, - unsigned *varying_count) -{ - *varying_count = 0; - - unsigned comps[PAN_MAX_VARYINGS] = { 0 }; - - nir_foreach_variable_with_modes(var, s, varying_mode) { - unsigned loc = var->data.driver_location; - const struct glsl_type *column = - glsl_without_array_or_matrix(var->type); - unsigned chan = glsl_get_components(column); - - /* If we have a fractional location added, we need to increase the size - * so it will fit, i.e. a vec3 in YZW requires us to allocate a vec4. - * We could do better but this is an edge case as it is, normally - * packed varyings will be aligned. - */ - chan += var->data.location_frac; - comps[loc] = MAX2(comps[loc], chan); - } - - nir_foreach_variable_with_modes(var, s, varying_mode) { - unsigned loc = var->data.driver_location; - unsigned sz = glsl_count_attribute_slots(var->type, FALSE); - const struct glsl_type *column = - glsl_without_array_or_matrix(var->type); - enum glsl_base_type base_type = glsl_get_base_type(column); - unsigned chan = comps[loc]; - - nir_alu_type type = nir_get_nir_type_for_glsl_base_type(base_type); - type = nir_alu_type_get_base_type(type); - - /* Can't do type conversion since GLSL IR packs in funny ways */ - if (var->data.interpolation == INTERP_MODE_FLAT) - type = nir_type_uint; - - /* Point size is handled specially on Valhall (with malloc - * IDVS).. probably though this entire linker should be bypassed - * for Valhall in the future. - */ - if (PAN_ARCH >= 9 && var->data.location == VARYING_SLOT_PSIZ) - continue; - - /* Demote to fp16 where possible. int16 varyings are TODO as the hw - * will saturate instead of wrap which is not conformant, so we need to - * insert i2i16/u2u16 instructions before the st_vary_32i/32u to get - * the intended behaviour. - */ - if (type == nir_type_float && - (var->data.precision == GLSL_PRECISION_MEDIUM || - var->data.precision == GLSL_PRECISION_LOW)) { - type |= 16; - } else { - type |= 32; - } - - enum pipe_format format = varying_format(type, chan); - assert(format != PIPE_FORMAT_NONE); - - for (int c = 0; c < sz; ++c) { - assert(loc + c < PAN_MAX_VARYINGS); - varyings[loc + c].location = var->data.location + c; - varyings[loc + c].format = format; - } - - *varying_count = MAX2(*varying_count, loc + sz); - } -} -#endif - #if PAN_ARCH >= 6 static enum mali_register_file_format bifrost_blend_type_from_nir(nir_alu_type nir_type) @@ -249,9 +132,6 @@ GENX(pan_shader_compile)(nir_shader *s, #if PAN_ARCH >= 9 info->varyings.output_count = util_last_bit(s->info.outputs_written >> VARYING_SLOT_VAR0); -#else - collect_varyings(s, nir_var_shader_out, info->varyings.output, - &info->varyings.output_count); #endif break; case MESA_SHADER_FRAGMENT: @@ -308,9 +188,6 @@ GENX(pan_shader_compile)(nir_shader *s, #if PAN_ARCH >= 9 info->varyings.output_count = util_last_bit(s->info.outputs_read >> VARYING_SLOT_VAR0); -#else - collect_varyings(s, nir_var_shader_in, info->varyings.input, - &info->varyings.input_count); #endif break; default: diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 2f64d71a69d..940a98b75e0 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -3203,6 +3203,9 @@ midgard_compile_shader_nir(nir_shader *nir, NIR_PASS_V(nir, midgard_nir_lower_global_load); + /* Collect varyings after lowering I/O */ + pan_nir_collect_varyings(nir, info); + /* Optimisation passes */ optimise_nir(nir, ctx->quirks, inputs->is_blend, inputs->is_blit); diff --git a/src/panfrost/util/meson.build b/src/panfrost/util/meson.build index 3e13bb5f8ac..1112ae56e39 100644 --- a/src/panfrost/util/meson.build +++ b/src/panfrost/util/meson.build @@ -23,6 +23,7 @@ libpanfrost_util_files = files( 'lcra.c', 'lcra.h', 'nir_mod_helpers.c', + 'pan_collect_varyings.c', 'pan_ir.c', 'pan_ir.h', 'pan_liveness.c', diff --git a/src/panfrost/util/pan_collect_varyings.c b/src/panfrost/util/pan_collect_varyings.c new file mode 100644 index 00000000000..6aa85b7089f --- /dev/null +++ b/src/panfrost/util/pan_collect_varyings.c @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * Copyright (C) 2019-2022 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "compiler/nir/nir.h" +#include "compiler/nir/nir_builder.h" +#include "pan_ir.h" + +static enum pipe_format +varying_format(nir_alu_type t, unsigned ncomps) +{ + assert(ncomps >= 1 && ncomps <= 4); + +#define VARYING_FORMAT(ntype, nsz, ptype, psz) \ + { \ + .type = nir_type_ ## ntype ## nsz, \ + .formats = { \ + PIPE_FORMAT_R ## psz ## _ ## ptype, \ + PIPE_FORMAT_R ## psz ## G ## psz ## _ ## ptype, \ + PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz ## _ ## ptype, \ + PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz ## A ## psz ## _ ## ptype, \ + } \ + } + + static const struct { + nir_alu_type type; + enum pipe_format formats[4]; + } conv[] = { + VARYING_FORMAT(float, 32, FLOAT, 32), + VARYING_FORMAT(uint, 32, UINT, 32), + VARYING_FORMAT(float, 16, FLOAT, 16), + }; +#undef VARYING_FORMAT + + assert(ncomps > 0 && ncomps <= ARRAY_SIZE(conv[0].formats)); + + for (unsigned i = 0; i < ARRAY_SIZE(conv); i++) { + if (conv[i].type == t) + return conv[i].formats[ncomps - 1]; + } + + unreachable("Invalid type"); +} + +struct slot_info { + nir_alu_type type; + unsigned count; + unsigned index; +}; + +static bool +walk_varyings(UNUSED nir_builder *b, nir_instr *instr, void *data) +{ + struct slot_info *slots = data; + + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + unsigned count; + + /* Only consider intrinsics that access varyings */ + switch (intr->intrinsic) { + case nir_intrinsic_store_output: + if (b->shader->info.stage != MESA_SHADER_VERTEX) + return false; + + count = nir_src_num_components(intr->src[0]); + break; + + case nir_intrinsic_load_input: + case nir_intrinsic_load_interpolated_input: + if (b->shader->info.stage != MESA_SHADER_FRAGMENT) + return false; + + count = nir_dest_num_components(intr->dest); + break; + + default: + return false; + } + + nir_io_semantics sem = nir_intrinsic_io_semantics(intr); + + if (sem.no_varying) + return false; + + /* In a fragment shader, flat shading is lowered to load_input but + * interpolation is lowered to load_interpolated_input, so we can check + * the intrinsic to distinguish. + * + * In a vertex shader, we consider everything flat, as the information + * will not contribute to the final linked varyings -- flatness is used + * only to determine the type, and the GL linker uses the type from the + * fragment shader instead. + */ + bool flat = (intr->intrinsic != nir_intrinsic_load_interpolated_input); + nir_alu_type type = flat ? nir_type_uint : nir_type_float; + + /* Demote interpolated float varyings to fp16 where possible. We do not + * demote flat varyings, including integer varyings, due to various + * issues with the Midgard hardware behaviour and TGSI shaders, as well + * as having no demonstrable benefit in practice. + */ + if (type == nir_type_float && sem.medium_precision) + type |= 16; + else + type |= 32; + + /* Count currently contains the number of components accessed by this + * intrinsics. However, we may be accessing a fractional location, + * indicating by the NIR component. Add that in. The final value be the + * maximum (component + count), an upper bound on the number of + * components possibly used. + */ + count += nir_intrinsic_component(intr); + + /* Consider each slot separately */ + for (unsigned offset = 0; offset < sem.num_slots; ++offset) { + unsigned location = sem.location + offset; + unsigned index = nir_intrinsic_base(intr) + offset; + + if (slots[location].type) { + assert(slots[location].type == type); + assert(slots[location].index == index); + } else { + slots[location].type = type; + slots[location].index = index; + } + + slots[location].count = MAX2(slots[location].count, count); + } + + return false; +} + +void +pan_nir_collect_varyings(nir_shader *s, struct pan_shader_info *info) +{ + if (s->info.stage != MESA_SHADER_VERTEX && + s->info.stage != MESA_SHADER_FRAGMENT) + return; + + struct slot_info slots[64] = { 0 }; + nir_shader_instructions_pass(s, walk_varyings, nir_metadata_all, slots); + + struct pan_shader_varying *varyings = + (s->info.stage == MESA_SHADER_VERTEX) ? + info->varyings.output : + info->varyings.input; + + unsigned count = 0; + + for (unsigned i = 0; i < ARRAY_SIZE(slots); ++i) { + if (!slots[i].type) + continue; + + enum pipe_format format = + varying_format(slots[i].type, slots[i].count); + assert(format != PIPE_FORMAT_NONE); + + unsigned index = slots[i].index; + count = MAX2(count, index + 1); + + varyings[index].location = i; + varyings[index].format = format; + } + + if (s->info.stage == MESA_SHADER_VERTEX) + info->varyings.output_count = count; + else + info->varyings.input_count = count; +} diff --git a/src/panfrost/util/pan_ir.h b/src/panfrost/util/pan_ir.h index 6af25a8b6d5..20915a1a6db 100644 --- a/src/panfrost/util/pan_ir.h +++ b/src/panfrost/util/pan_ir.h @@ -510,6 +510,8 @@ bool pan_lower_helper_invocation(nir_shader *shader); bool pan_lower_sample_pos(nir_shader *shader); bool pan_lower_xfb(nir_shader *nir); +void pan_nir_collect_varyings(nir_shader *s, struct pan_shader_info *info); + /* * Helper returning the subgroup size. Generally, this is equal to the number of * threads in a warp. For Midgard (including warping models), this returns 1, as