From d0281fc16a8b534f2cc54d55d84c30b2e7af2d8d Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sat, 15 Oct 2022 20:09:25 -0400 Subject: [PATCH] pan/mdg: Use bifrost_nir_lower_store_component Move the pass from the Bifrost compiler to the Midgard/Bifrost common code directory, and take advantage of it on Midgard, where it fixes the same tests as it fixed originally on Bifrost. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/bifrost_compile.c | 69 +----------- src/panfrost/ci/panfrost-t860-fails.txt | 5 - src/panfrost/midgard/midgard_compile.c | 10 ++ src/panfrost/util/meson.build | 1 + src/panfrost/util/pan_ir.h | 1 + src/panfrost/util/pan_lower_store_component.c | 105 ++++++++++++++++++ 6 files changed, 118 insertions(+), 73 deletions(-) create mode 100644 src/panfrost/util/pan_lower_store_component.c diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 5afb34e0e78..d63f45b5afa 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -4686,68 +4686,6 @@ bi_opt_post_ra(bi_context *ctx) } } -/* If the shader packs multiple varyings into the same location with different - * location_frac, we'll need to lower to a single varying store that collects - * all of the channels together. - */ -static bool -bifrost_nir_lower_store_component(struct nir_builder *b, - nir_instr *instr, void *data) -{ - if (instr->type != nir_instr_type_intrinsic) - return false; - - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - - if (intr->intrinsic != nir_intrinsic_store_output) - return false; - - struct hash_table_u64 *slots = data; - unsigned component = nir_intrinsic_component(intr); - nir_src *slot_src = nir_get_io_offset_src(intr); - uint64_t slot = nir_src_as_uint(*slot_src) + nir_intrinsic_base(intr); - - nir_intrinsic_instr *prev = _mesa_hash_table_u64_search(slots, slot); - unsigned mask = (prev ? nir_intrinsic_write_mask(prev) : 0); - - nir_ssa_def *value = intr->src[0].ssa; - b->cursor = nir_before_instr(&intr->instr); - - nir_ssa_def *undef = nir_ssa_undef(b, 1, value->bit_size); - nir_ssa_def *channels[4] = { undef, undef, undef, undef }; - - /* Copy old */ - u_foreach_bit(i, mask) { - assert(prev != NULL); - nir_ssa_def *prev_ssa = prev->src[0].ssa; - channels[i] = nir_channel(b, prev_ssa, i); - } - - /* Copy new */ - unsigned new_mask = nir_intrinsic_write_mask(intr); - mask |= (new_mask << component); - - u_foreach_bit(i, new_mask) { - assert(component + i < 4); - channels[component + i] = nir_channel(b, value, i); - } - - intr->num_components = util_last_bit(mask); - nir_instr_rewrite_src_ssa(instr, &intr->src[0], - nir_vec(b, channels, intr->num_components)); - - nir_intrinsic_set_component(intr, 0); - nir_intrinsic_set_write_mask(intr, mask); - - if (prev) { - _mesa_hash_table_u64_remove(slots, slot); - nir_instr_remove(&prev->instr); - } - - _mesa_hash_table_u64_insert(slots, slot, intr); - return false; -} - /* Dead code elimination for branches at the end of a block - only one branch * per block is legal semantically, but unreachable jumps can be generated. * Likewise on Bifrost we can generate jumps to the terminal block which need @@ -4934,12 +4872,7 @@ bi_finalize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend) BITFIELD64_BIT(VARYING_SLOT_PSIZ), false); } - struct hash_table_u64 *stores = _mesa_hash_table_u64_create(NULL); - NIR_PASS_V(nir, nir_shader_instructions_pass, - bifrost_nir_lower_store_component, - nir_metadata_block_index | - nir_metadata_dominance, stores); - _mesa_hash_table_u64_destroy(stores); + NIR_PASS_V(nir, pan_nir_lower_store_component); } NIR_PASS_V(nir, nir_lower_ssbo); diff --git a/src/panfrost/ci/panfrost-t860-fails.txt b/src/panfrost/ci/panfrost-t860-fails.txt index 87ff8a9709b..8190d962540 100644 --- a/src/panfrost/ci/panfrost-t860-fails.txt +++ b/src/panfrost/ci/panfrost-t860-fails.txt @@ -40,11 +40,6 @@ dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_src_dst_x,Fa dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_src_dst_y,Fail dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_src_x,Fail dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_src_y,Fail -dEQP-GLES31.functional.separate_shader.random.35,Fail -dEQP-GLES31.functional.separate_shader.random.68,Fail -dEQP-GLES31.functional.separate_shader.random.79,Fail -dEQP-GLES31.functional.separate_shader.random.80,Fail -dEQP-GLES31.functional.separate_shader.random.89,Fail dEQP-GLES31.functional.texture.gather.basic.cube.depth32f.no_corners.size_pot.compare_greater.clamp_to_edge_repeat,Fail dEQP-GLES31.functional.texture.gather.basic.cube.depth32f.no_corners.size_pot.compare_greater.mirrored_repeat_clamp_to_edge,Fail dEQP-GLES31.functional.texture.gather.basic.cube.depth32f.no_corners.size_pot.compare_greater.repeat_mirrored_repeat,Fail diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 08a46f77a8f..2f64d71a69d 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -3186,6 +3186,16 @@ midgard_compile_shader_nir(nir_shader *nir, NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, glsl_type_size, 0); + + if (ctx->stage == MESA_SHADER_VERTEX) { + /* nir_lower[_explicit]_io is lazy and emits mul+add chains even + * for offsets it could figure out are constant. Do some + * constant folding before pan_nir_lower_store_component below. + */ + NIR_PASS_V(nir, nir_opt_constant_folding); + NIR_PASS_V(nir, pan_nir_lower_store_component); + } + NIR_PASS_V(nir, nir_lower_ssbo); NIR_PASS_V(nir, pan_nir_lower_zs_store); diff --git a/src/panfrost/util/meson.build b/src/panfrost/util/meson.build index 34858723feb..3e13bb5f8ac 100644 --- a/src/panfrost/util/meson.build +++ b/src/panfrost/util/meson.build @@ -29,6 +29,7 @@ libpanfrost_util_files = files( 'pan_lower_framebuffer.c', 'pan_lower_helper_invocation.c', 'pan_lower_sample_position.c', + 'pan_lower_store_component.c', 'pan_lower_writeout.c', 'pan_lower_xfb.c', 'pan_lower_64bit_intrin.c', diff --git a/src/panfrost/util/pan_ir.h b/src/panfrost/util/pan_ir.h index 98b4decd117..6af25a8b6d5 100644 --- a/src/panfrost/util/pan_ir.h +++ b/src/panfrost/util/pan_ir.h @@ -502,6 +502,7 @@ bool pan_has_dest_mod(nir_dest **dest, nir_op op); #define PAN_WRITEOUT_2 8 bool pan_nir_lower_zs_store(nir_shader *nir); +bool pan_nir_lower_store_component(nir_shader *shader); bool pan_nir_lower_64bit_intrin(nir_shader *shader); diff --git a/src/panfrost/util/pan_lower_store_component.c b/src/panfrost/util/pan_lower_store_component.c new file mode 100644 index 00000000000..5178317232b --- /dev/null +++ b/src/panfrost/util/pan_lower_store_component.c @@ -0,0 +1,105 @@ +/* + * Copyright (C) 2020-2022 Collabora Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors (Collabora): + * Alyssa Rosenzweig + */ + +#include "pan_ir.h" +#include "compiler/nir/nir_builder.h" + +/* + * If the shader packs multiple varyings into the same location with different + * location_frac, we'll need to lower to a single varying store that collects + * all of the channels together. This is because the varying instruction on + * Midgard and Bifrost is slot-based, writing out an entire vec4 slot at a time. + */ +static bool +lower_store_component(nir_builder *b, nir_instr *instr, void *data) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + if (intr->intrinsic != nir_intrinsic_store_output) + return false; + + struct hash_table_u64 *slots = data; + unsigned component = nir_intrinsic_component(intr); + nir_src *slot_src = nir_get_io_offset_src(intr); + uint64_t slot = nir_src_as_uint(*slot_src) + nir_intrinsic_base(intr); + + nir_intrinsic_instr *prev = _mesa_hash_table_u64_search(slots, slot); + unsigned mask = (prev ? nir_intrinsic_write_mask(prev) : 0); + + nir_ssa_def *value = intr->src[0].ssa; + b->cursor = nir_before_instr(&intr->instr); + + nir_ssa_def *undef = nir_ssa_undef(b, 1, value->bit_size); + nir_ssa_def *channels[4] = { undef, undef, undef, undef }; + + /* Copy old */ + u_foreach_bit(i, mask) { + assert(prev != NULL); + nir_ssa_def *prev_ssa = prev->src[0].ssa; + channels[i] = nir_channel(b, prev_ssa, i); + } + + /* Copy new */ + unsigned new_mask = nir_intrinsic_write_mask(intr); + mask |= (new_mask << component); + + u_foreach_bit(i, new_mask) { + assert(component + i < 4); + channels[component + i] = nir_channel(b, value, i); + } + + intr->num_components = util_last_bit(mask); + nir_instr_rewrite_src_ssa(instr, &intr->src[0], + nir_vec(b, channels, intr->num_components)); + + nir_intrinsic_set_component(intr, 0); + nir_intrinsic_set_write_mask(intr, mask); + + if (prev) { + _mesa_hash_table_u64_remove(slots, slot); + nir_instr_remove(&prev->instr); + } + + _mesa_hash_table_u64_insert(slots, slot, intr); + return false; +} + +bool +pan_nir_lower_store_component(nir_shader *s) +{ + assert(s->info.stage == MESA_SHADER_VERTEX); + + struct hash_table_u64 *stores = _mesa_hash_table_u64_create(NULL); + bool progress = nir_shader_instructions_pass(s, lower_store_component, + nir_metadata_block_index | + nir_metadata_dominance, + stores); + _mesa_hash_table_u64_destroy(stores); + return progress; +}