diff --git a/src/panfrost/bifrost/bi_opt_message_preload.c b/src/panfrost/bifrost/bi_opt_message_preload.c new file mode 100644 index 00000000000..e19eeb4b0ea --- /dev/null +++ b/src/panfrost/bifrost/bi_opt_message_preload.c @@ -0,0 +1,141 @@ +/* + * Copyright (C) 2021 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "compiler.h" +#include "bi_builder.h" + +/* Bifrost v7 can preload up to two messages of the form: + * + * 1. +LD_VAR_IMM, register_format f32/f16, sample mode + * 2. +VAR_TEX, register format f32/f16, sample mode (TODO) + * + * Analyze the shader for these instructions and push accordingly. + */ + +static bool +bi_is_regfmt_float(enum bi_register_format regfmt) +{ + return (regfmt == BI_REGISTER_FORMAT_F32) || + (regfmt == BI_REGISTER_FORMAT_F16); +} + +/* + * Preloaded varyings are interpolated at the sample location. Check if an + * instruction can use this interpolation mode. + */ +static bool +bi_can_interp_at_sample(bi_instr *I) +{ + /* .sample mode with r61 corresponds to per-sample interpolation */ + if (I->sample == BI_SAMPLE_SAMPLE) + return bi_is_value_equiv(I->src[0], bi_register(61)); + + /* If the shader runs with pixel-frequency shading, .sample is + * equivalent to .center, so allow .center + * + * If the shader runs with sample-frequency shading, .sample and .center + * are not equivalent. However, the ESSL 3.20 specification + * stipulates in section 4.5 ("Interpolation Qualifiers"): + * + * for fragment shader input variables qualified with neither + * centroid nor sample, the value of the assigned variable may be + * interpolated anywhere within the pixel and a single value may be + * assigned to each sample within the pixel, to the extent permitted + * by the OpenGL ES Specification. + * + * We only produce .center for variables qualified with neither centroid + * nor sample, so if .center is specified this section applies. This + * suggests that, although per-pixel interpolation is allowed, it is not + * mandated ("may" rather than "must" or "should"). Therefore it appears + * safe to substitute sample. + */ + return (I->sample == BI_SAMPLE_CENTER); +} + +static bool +bi_can_preload_ld_var(bi_instr *I) +{ + return (I->op == BI_OPCODE_LD_VAR_IMM) && + bi_can_interp_at_sample(I) && + bi_is_regfmt_float(I->register_format); +} + +static bool +bi_is_var_tex(enum bi_opcode op) +{ + return (op == BI_OPCODE_VAR_TEX_F32) || (op == BI_OPCODE_VAR_TEX_F16); +} + +void +bi_opt_message_preload(bi_context *ctx) +{ + unsigned nr_preload = 0; + + /* We only preload from the first block */ + bi_block *block = bi_start_block(&ctx->blocks); + bi_builder b = bi_init_builder(ctx, bi_before_nonempty_block(block)); + + bi_foreach_instr_in_block_safe(block, I) { + if (!bi_is_ssa(I->dest[0])) continue; + + struct bifrost_message_preload msg; + + if (bi_can_preload_ld_var(I)) { + msg = (struct bifrost_message_preload) { + .enabled = true, + .varying_index = I->varying_index, + .fp16 = (I->register_format == BI_REGISTER_FORMAT_F16), + .num_components = I->vecsize + 1 + }; + } else if (bi_is_var_tex(I->op)) { + msg = (struct bifrost_message_preload) { + .enabled = true, + .texture = true, + .varying_index = I->varying_index, + .sampler_index = I->sampler_index, + .fp16 = (I->op == BI_OPCODE_VAR_TEX_F16), + .skip = I->skip, + .zero_lod = I->lod_mode + }; + } else { + continue; + } + + /* Report the preloading */ + ctx->info.bifrost->messages[nr_preload] = msg; + + /* Replace with moves at the start. Ideally, they will be + * coalesced out or copy propagated. + */ + for (unsigned i = 0; i < bi_count_write_registers(I, 0); ++i) { + bi_mov_i32_to(&b, bi_word(I->dest[0], i), + bi_register((nr_preload * 4) + i)); + } + + bi_remove_instruction(I); + + /* Maximum number of preloaded messages */ + if ((++nr_preload) == 2) + break; + } +} diff --git a/src/panfrost/bifrost/bifrost.h b/src/panfrost/bifrost/bifrost.h index c04b8a61ad4..6dce0c53b38 100644 --- a/src/panfrost/bifrost/bifrost.h +++ b/src/panfrost/bifrost/bifrost.h @@ -46,6 +46,7 @@ extern "C" { #define BIFROST_DBG_NOOPT 0x0100 #define BIFROST_DBG_NOIDVS 0x0200 #define BIFROST_DBG_NOSB 0x0400 +#define BIFROST_DBG_NOPRELOAD 0x0800 extern int bifrost_debug; diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index ce51a5a40c6..0450e73cb34 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -48,6 +48,7 @@ static const struct debug_named_value bifrost_debug_options[] = { {"noopt", BIFROST_DBG_NOOPT, "Skip optimization passes"}, {"noidvs", BIFROST_DBG_NOIDVS, "Disable IDVS"}, {"nosb", BIFROST_DBG_NOSB, "Disable scoreboarding"}, + {"nopreload", BIFROST_DBG_NOPRELOAD, "Disable message preloading"}, DEBUG_NAMED_VALUE_END }; @@ -4012,6 +4013,16 @@ bi_compile_variant_nir(nir_shader *nir, bi_opt_copy_prop(ctx); bi_opt_mod_prop_forward(ctx); bi_opt_mod_prop_backward(ctx); + + /* Push LD_VAR_IMM/VAR_TEX instructions. Must run after + * mod_prop_backward to fuse VAR_TEX */ + if (ctx->arch == 7 && ctx->stage == MESA_SHADER_FRAGMENT && + !(bifrost_debug & BIFROST_DBG_NOPRELOAD)) { + bi_opt_dead_code_eliminate(ctx); + bi_opt_message_preload(ctx); + bi_opt_copy_prop(ctx); + } + bi_opt_dead_code_eliminate(ctx); bi_opt_cse(ctx); bi_opt_dead_code_eliminate(ctx); diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h index 664e25a3c7e..f71478e0cfd 100644 --- a/src/panfrost/bifrost/compiler.h +++ b/src/panfrost/bifrost/compiler.h @@ -986,6 +986,7 @@ void bi_opt_mod_prop_backward(bi_context *ctx); void bi_opt_dead_code_eliminate(bi_context *ctx); void bi_opt_fuse_dual_texture(bi_context *ctx); void bi_opt_dce_post_ra(bi_context *ctx); +void bi_opt_message_preload(bi_context *ctx); void bi_opt_push_ubo(bi_context *ctx); void bi_opt_reorder_push(bi_context *ctx); void bi_lower_swizzle(bi_context *ctx); diff --git a/src/panfrost/bifrost/meson.build b/src/panfrost/bifrost/meson.build index 1dcd9b572da..eda61e8421a 100644 --- a/src/panfrost/bifrost/meson.build +++ b/src/panfrost/bifrost/meson.build @@ -33,6 +33,7 @@ libpanfrost_bifrost_files = files( 'bi_opt_dce.c', 'bi_opt_cse.c', 'bi_opt_push_ubo.c', + 'bi_opt_message_preload.c', 'bi_opt_mod_props.c', 'bi_opt_dual_tex.c', 'bi_pack.c',