diff --git a/src/panfrost/Makefile.sources b/src/panfrost/Makefile.sources index f0fdc59f632..080a91fa6de 100644 --- a/src/panfrost/Makefile.sources +++ b/src/panfrost/Makefile.sources @@ -4,6 +4,7 @@ bifrost_FILES := \ bifrost/bifrost_compile.h \ bifrost/bi_layout.c \ bifrost/bi_liveness.c \ + bifrost/bi_lower_divergent_indirects.c \ bifrost/bi_lower_swizzle.c \ bifrost/bi_schedule.c \ bifrost/bi_scoreboard.c \ diff --git a/src/panfrost/bifrost/bi_lower_divergent_indirects.c b/src/panfrost/bifrost/bi_lower_divergent_indirects.c new file mode 100644 index 00000000000..1b52040608d --- /dev/null +++ b/src/panfrost/bifrost/bi_lower_divergent_indirects.c @@ -0,0 +1,128 @@ +/* + * Copyright (C) 2021 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "compiler.h" +#include "compiler/nir/nir_builder.h" + +/* Divergent attribute access is undefined behaviour. To avoid divergence, + * lower to an if-chain like: + * + * value = 0; + * if (lane == 0) + * value = ld() + * else if (lane == 1) + * value = ld() + * ... + * else if (lane == MAX_LANE) + * value = ld() + */ + +static bool +bi_lower_divergent_indirects_impl(nir_builder *b, nir_instr *instr, void *data) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + gl_shader_stage stage = b->shader->info.stage; + nir_src *offset; + + /* Not all indirect access needs this workaround */ + switch (intr->intrinsic) { + case nir_intrinsic_load_input: + case nir_intrinsic_load_interpolated_input: + /* Attributes and varyings */ + offset = nir_get_io_offset_src(intr); + break; + + case nir_intrinsic_store_output: + /* Varyings only */ + if (stage == MESA_SHADER_FRAGMENT) + return false; + + offset = nir_get_io_offset_src(intr); + break; + + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_imin: + case nir_intrinsic_image_atomic_umin: + case nir_intrinsic_image_atomic_imax: + case nir_intrinsic_image_atomic_umax: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_load: + case nir_intrinsic_image_store: + /* Any image access */ + offset = &intr->src[0]; + break; + default: + return false; + } + + if (!nir_src_is_divergent(*offset)) + return false; + + /* This indirect does need it */ + + b->cursor = nir_before_instr(instr); + nir_ssa_def *lane = nir_load_subgroup_invocation(b); + unsigned *lanes = data; + + /* Write zero in a funny way to bypass lower_load_const_to_scalar */ + bool has_dest = nir_intrinsic_infos[intr->intrinsic].has_dest; + unsigned size = has_dest ? nir_dest_bit_size(intr->dest) : 32; + nir_ssa_def *zero = has_dest ? nir_imm_zero(b, 1, size) : NULL; + nir_ssa_def *zeroes[4] = { zero, zero, zero, zero }; + nir_ssa_def *res = has_dest ? + nir_vec(b, zeroes, nir_dest_num_components(intr->dest)) : NULL; + + for (unsigned i = 0; i < (*lanes); ++i) { + nir_push_if(b, nir_ieq_imm(b, lane, i)); + + nir_instr *c = nir_instr_clone(b->shader, instr); + nir_intrinsic_instr *c_intr = nir_instr_as_intrinsic(c); + nir_builder_instr_insert(b, c); + nir_pop_if(b, NULL); + + if (has_dest) { + assert(c_intr->dest.is_ssa); + nir_ssa_def *c_ssa = &c_intr->dest.ssa; + res = nir_if_phi(b, c_ssa, res); + } + } + + if (has_dest) + nir_ssa_def_rewrite_uses(&intr->dest.ssa, res); + + nir_instr_remove(instr); + return true; +} + +bool +bi_lower_divergent_indirects(nir_shader *shader, unsigned lanes) +{ + return nir_shader_instructions_pass(shader, + bi_lower_divergent_indirects_impl, + nir_metadata_none, &lanes); +} diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 5d6861ff1ad..6291549ea18 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -2907,8 +2907,27 @@ bi_vectorize_filter(const nir_instr *instr, void *data) } } +/* XXX: This is a kludge to workaround NIR's lack of divergence metadata. If we + * keep divergence info around after we consume it for indirect lowering, + * nir_convert_from_ssa will regress code quality since it will avoid + * coalescing divergent with non-divergent nodes. */ + +static bool +nir_invalidate_divergence_ssa(nir_ssa_def *ssa, UNUSED void *data) +{ + ssa->divergent = false; + return true; +} + +static bool +nir_invalidate_divergence(struct nir_builder *b, nir_instr *instr, + UNUSED void *data) +{ + return nir_foreach_ssa_def(instr, nir_invalidate_divergence_ssa, NULL); +} + static void -bi_optimize_nir(nir_shader *nir, bool is_blend) +bi_optimize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend) { bool progress; unsigned lower_flrp = 16 | 32 | 64; @@ -3014,6 +3033,24 @@ bi_optimize_nir(nir_shader *nir, bool is_blend) NIR_PASS_V(nir, nir_opt_move, move_all); } + /* We might lower attribute, varying, and image indirects. Use the + * gathered info to skip the extra analysis in the happy path. */ + bool any_indirects = + nir->info.inputs_read_indirectly || + nir->info.outputs_accessed_indirectly || + nir->info.patch_inputs_read_indirectly || + nir->info.patch_outputs_accessed_indirectly || + nir->info.images_used; + + if (any_indirects) { + nir_convert_to_lcssa(nir, true, true); + NIR_PASS_V(nir, nir_divergence_analysis); + NIR_PASS_V(nir, bi_lower_divergent_indirects, + bifrost_lanes_per_warp(gpu_id)); + NIR_PASS_V(nir, nir_shader_instructions_pass, + nir_invalidate_divergence, nir_metadata_all, NULL); + } + /* Take us out of SSA */ NIR_PASS(progress, nir, nir_lower_locals_to_regs); NIR_PASS(progress, nir, nir_move_vec_src_uses_to_dest); @@ -3172,7 +3209,7 @@ bifrost_compile_shader_nir(nir_shader *nir, NULL); } - bi_optimize_nir(nir, ctx->inputs->is_blend); + bi_optimize_nir(nir, ctx->inputs->gpu_id, ctx->inputs->is_blend); NIR_PASS_V(nir, pan_nir_reorder_writeout); diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h index 1c1ee1ab2b0..e8bbcfc76ea 100644 --- a/src/panfrost/bifrost/compiler.h +++ b/src/panfrost/bifrost/compiler.h @@ -988,4 +988,9 @@ bi_builder_insert(bi_cursor *cursor, bi_instr *I) unreachable("Invalid cursor option"); } + +/* NIR passes */ + +bool bi_lower_divergent_indirects(nir_shader *shader, unsigned lanes); + #endif diff --git a/src/panfrost/bifrost/meson.build b/src/panfrost/bifrost/meson.build index 018189b8453..21f672e4198 100644 --- a/src/panfrost/bifrost/meson.build +++ b/src/panfrost/bifrost/meson.build @@ -22,6 +22,7 @@ libpanfrost_bifrost_files = files( 'bi_layout.c', 'bi_liveness.c', + 'bi_lower_divergent_indirects.c', 'bi_lower_swizzle.c', 'bi_print.c', 'bi_opt_constant_fold.c',