From fac4edbcba78e485c187762d55c632a86887f679 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Mon, 27 Apr 2026 09:14:00 +0200 Subject: [PATCH] nir/opt_varyings: back propagate signed zero information to outputs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foz-DB Navi48: Totals from 809 (0.67% of 120695) affected shaders: MaxWaves: 21804 -> 21808 (+0.02%) Instrs: 863131 -> 861310 (-0.21%); split: -0.22%, +0.01% CodeSize: 4535500 -> 4523232 (-0.27%); split: -0.30%, +0.03% VGPRs: 47304 -> 47280 (-0.05%) SpillSGPRs: 170 -> 82 (-51.76%) Latency: 6791484 -> 6786880 (-0.07%); split: -0.07%, +0.00% InvThroughput: 906281 -> 905301 (-0.11%); split: -0.11%, +0.00% VClause: 16910 -> 16917 (+0.04%); split: -0.01%, +0.05% SClause: 21856 -> 21827 (-0.13%); split: -0.14%, +0.01% Copies: 61890 -> 61436 (-0.73%); split: -0.80%, +0.06% Branches: 19725 -> 19640 (-0.43%) PreSGPRs: 38011 -> 37851 (-0.42%) PreVGPRs: 36482 -> 36454 (-0.08%) VALU: 465316 -> 464323 (-0.21%); split: -0.22%, +0.00% SALU: 143757 -> 143395 (-0.25%); split: -0.33%, +0.08% VMEM: 36827 -> 36806 (-0.06%) SMEM: 37769 -> 37768 (-0.00%) Reviewed-by: Marek Olšák Part-of: --- src/compiler/nir/nir_opt_varyings.c | 87 ++++++++++++++++++- .../opt_varyings_tests_bicm_binary_alu.cpp | 2 +- .../tests/opt_varyings_tests_dead_output.cpp | 2 +- .../nir/tests/opt_varyings_tests_dedup.cpp | 2 +- .../tests/opt_varyings_tests_prop_const.cpp | 2 +- .../nir/tests/opt_varyings_tests_prop_ubo.cpp | 2 +- .../tests/opt_varyings_tests_prop_uniform.cpp | 2 +- .../opt_varyings_tests_prop_uniform_expr.cpp | 2 +- 8 files changed, 91 insertions(+), 10 deletions(-) diff --git a/src/compiler/nir/nir_opt_varyings.c b/src/compiler/nir/nir_opt_varyings.c index 58a44712d10..9fb943802f5 100644 --- a/src/compiler/nir/nir_opt_varyings.c +++ b/src/compiler/nir/nir_opt_varyings.c @@ -162,7 +162,13 @@ * * Eliminated output stores get the "no_varying" flag if they are also * xfb stores or write sysval outputs. * - * 5. Backward inter-shader code motion + * 5. Link signed zero information + * + * If no loads care about the sign of zero, and if there is no xfb or + * sign aware sysval usage, set the no_signed_zero flag for output stores. + * This can then be further back propagated by `nir_opt_fp_math_ctrl`. + * + * 6. Backward inter-shader code motion * * "Backward" refers to moving code in the opposite direction that shaders * are executed, i.e. moving code from the consumer to the producer. @@ -329,7 +335,7 @@ * the above case (which is temp0 and temp1 to replace all 3 inputs), let * us know. * - * 6. Forward inter-shader code motion + * 7. Forward inter-shader code motion * * TODO: Not implemented. The text below is a draft of the description. * @@ -365,7 +371,7 @@ * we don't increase the GPU overhead measurably by moving code across * pipeline stages that amplify GPU work. * - * 7. Compaction to vec4 slots (AKA packing) + * 8. Compaction to vec4 slots (AKA packing) * * First, varyings are divided into these groups, and components from each * group are assigned locations in this order (effectively forcing @@ -764,6 +770,11 @@ struct linkage_info { */ BITSET_DECLARE(convergent32_mask, NUM_SCALAR_SLOTS); BITSET_DECLARE(convergent16_mask, NUM_SCALAR_SLOTS); + + /* Mask of components that have an input load, xfb, or sysval usage that + * cares about the sign of zero. + */ + BITSET_DECLARE(signed_zero_mask, NUM_SCALAR_SLOTS); }; /****************************************************************** @@ -1060,6 +1071,32 @@ is_active_sysval_output(struct linkage_info *linkage, unsigned slot, !nir_intrinsic_io_semantics(intr).no_sysval_output; } +static bool +is_sz_sysval(struct linkage_info *linkage, unsigned slot, + nir_intrinsic_instr *intr) +{ + if (!is_active_sysval_output(linkage, slot, intr)) + return false; + + switch (vec4_slot(slot)) { + case VARYING_SLOT_POS: + case VARYING_SLOT_CLIP_VERTEX: + case VARYING_SLOT_PSIZ: + case VARYING_SLOT_CLIP_DIST0: + case VARYING_SLOT_CLIP_DIST1: + case VARYING_SLOT_CULL_DIST0: + case VARYING_SLOT_CULL_DIST1: + return false; + case VARYING_SLOT_TESS_LEVEL_OUTER: + case VARYING_SLOT_TESS_LEVEL_INNER: + case VARYING_SLOT_BOUNDING_BOX0: + case VARYING_SLOT_BOUNDING_BOX1: + /* These enums are aliased with integer mesh outputs. */ + return linkage->producer_stage != MESA_SHADER_TESS_CTRL; + default: return true; + } +} + /** * This function acts like a filter. The pass won't touch varyings that * return false here, and the return value is saved in the linkage bitmasks, @@ -1276,6 +1313,12 @@ gather_inputs(struct nir_builder *builder, nir_intrinsic_instr *intr, void *cb_d list_addtail(&node->head, &in->consumer.loads); in->num_slots = MAX2(in->num_slots, sem.num_slots); + if (!sem.no_signed_zero && intr->intrinsic != nir_intrinsic_load_interpolated_input) { + unsigned nsz_count = nir_src_is_const(offset) ? 1 : sem.num_slots; + for (unsigned i = 0; i < nsz_count; i++) + BITSET_SET(linkage->signed_zero_mask, slot + i * 8); + } + BITSET_SET(linkage->removable_mask, slot); enum fs_vec4_type fs_vec4_type = FS_VEC4_TYPE_NONE; @@ -1555,6 +1598,12 @@ gather_outputs(struct nir_builder *builder, nir_intrinsic_instr *intr, void *cb_ list_addtail(&node->head, &out->producer.loads); } + if (is_store ? (is_sz_sysval(linkage, slot, intr) || has_xfb(intr)) : !sem.no_signed_zero) { + unsigned nsz_count = nir_src_is_const(offset) ? 1 : sem.num_slots; + for (unsigned i = 0; i < nsz_count; i++) + BITSET_SET(linkage->signed_zero_mask, slot + i * 8); + } + BITSET_SET(linkage->removable_mask, slot); /* Indirect indexing. */ @@ -4181,6 +4230,35 @@ backward_inter_shader_code_motion(struct linkage_info *linkage, return false; } +/****************************************************************** + * SIGNED ZERO LINKING + ******************************************************************/ + +static void +link_no_signed_zero(struct linkage_info *linkage, + nir_opt_varyings_progress *progress) +{ + for (unsigned slot = 0; slot < NUM_SCALAR_SLOTS; slot++) { + struct scalar_slot *scalar_slot = &linkage->slot[slot]; + + list_for_each_entry(struct list_node, iter, &scalar_slot->producer.stores, head) { + nir_io_semantics sem = nir_intrinsic_io_semantics(iter->instr); + + bool no_signed_zero = true; + unsigned nsz_count = nir_src_is_const(*nir_get_io_offset_src(iter->instr)) ? 1 : sem.num_slots; + for (unsigned i = 0; i < nsz_count; i++) + no_signed_zero &= !BITSET_TEST(linkage->signed_zero_mask, slot + i * 8); + + if (sem.no_signed_zero != no_signed_zero) { + *progress |= nir_progress_producer; + sem.no_signed_zero = no_signed_zero; + nir_intrinsic_set_io_semantics(iter->instr, sem); + } + } + } +} + + /****************************************************************** * COMPACTION ******************************************************************/ @@ -5396,6 +5474,9 @@ nir_opt_varyings(nir_shader *producer, nir_shader *consumer, bool spirv, init_linkage(producer, consumer, spirv, max_uniform_components, max_ubos_per_stage, linkage, &progress); + + link_no_signed_zero(linkage, &progress); + /* This must be done after deduplication and before inter-shader code * motion. */ diff --git a/src/compiler/nir/tests/opt_varyings_tests_bicm_binary_alu.cpp b/src/compiler/nir/tests/opt_varyings_tests_bicm_binary_alu.cpp index 4da6fac1b8e..35b7034b19e 100644 --- a/src/compiler/nir/tests/opt_varyings_tests_bicm_binary_alu.cpp +++ b/src/compiler/nir/tests/opt_varyings_tests_bicm_binary_alu.cpp @@ -82,7 +82,7 @@ TEST_F(nir_opt_varyings_test_bicm_binary_alu, \ ASSERT_TRUE(!shader_contains_def(b2, load[0])); \ ASSERT_TRUE(!shader_contains_def(b2, load[1])); \ } else { \ - ASSERT_EQ(opt_varyings(), 0); \ + ASSERT_EQ(opt_varyings() & nir_progress_consumer, 0); \ ASSERT_TRUE(!shader_contains_alu_op(b1, nir_op_##alu, bitsize)); \ ASSERT_TRUE(shader_contains_alu_op(b2, nir_op_##alu, bitsize)); \ ASSERT_TRUE(shader_contains_instr(b1, &store[0]->instr)); \ diff --git a/src/compiler/nir/tests/opt_varyings_tests_dead_output.cpp b/src/compiler/nir/tests/opt_varyings_tests_dead_output.cpp index 97ece917312..c55cc1465e5 100644 --- a/src/compiler/nir/tests/opt_varyings_tests_dead_output.cpp +++ b/src/compiler/nir/tests/opt_varyings_tests_dead_output.cpp @@ -34,7 +34,7 @@ TEST_F(nir_opt_varyings_test_dead_output, \ store_output(b1, VARYING_SLOT_##slot, 0, nir_type_float##bitsize, \ nir_imm_floatN_t(b1, 0, bitsize), 0); \ \ - ASSERT_TRUE(opt_varyings() == 0); \ + opt_varyings(); \ ASSERT_TRUE(b1->shader->info.outputs_written == VARYING_BIT_##slot); \ ASSERT_TRUE(shader_contains_instr(b1, &intr->instr)); \ ASSERT_TRUE(nir_intrinsic_io_semantics(intr).no_varying == \ diff --git a/src/compiler/nir/tests/opt_varyings_tests_dedup.cpp b/src/compiler/nir/tests/opt_varyings_tests_dedup.cpp index 66cb13a32f7..ebceb76f4b0 100644 --- a/src/compiler/nir/tests/opt_varyings_tests_dedup.cpp +++ b/src/compiler/nir/tests/opt_varyings_tests_dedup.cpp @@ -62,7 +62,7 @@ TEST_F(nir_opt_varyings_test_dedup, \ ASSERT_TRUE(!shader_contains_def(b2, load[1][v])); \ } \ } else { \ - ASSERT_EQ(opt_varyings(), 0); \ + ASSERT_EQ(opt_varyings() & nir_progress_consumer, 0); \ for (unsigned v = 0; v < (is_per_vertex(b1, (gl_varying_slot)pslot[1], false) ? 3 : 1); v++) { \ ASSERT_TRUE(shader_contains_instr(b1, &store[0][v]->instr)); \ ASSERT_TRUE(shader_contains_instr(b1, &store[1][v]->instr)); \ diff --git a/src/compiler/nir/tests/opt_varyings_tests_prop_const.cpp b/src/compiler/nir/tests/opt_varyings_tests_prop_const.cpp index 6c20c15aa36..958ba352589 100644 --- a/src/compiler/nir/tests/opt_varyings_tests_prop_const.cpp +++ b/src/compiler/nir/tests/opt_varyings_tests_prop_const.cpp @@ -59,7 +59,7 @@ TEST_F(nir_opt_varyings_test_prop_const, \ SHADER_CONST_OUTPUT(producer_stage, consumer_stage, slot, comp, type, bitsize, value, value) \ \ if (nir_slot_is_sysval_output((gl_varying_slot)pindex, MESA_SHADER_##consumer_stage)) { \ - ASSERT_TRUE(opt_varyings() == nir_progress_consumer); \ + ASSERT_TRUE(opt_varyings() & nir_progress_consumer); \ ASSERT_TRUE(b1->shader->info.outputs_written == BITFIELD64_BIT(pindex)); \ ASSERT_TRUE(nir_intrinsic_io_semantics(store).no_varying); \ } else { \ diff --git a/src/compiler/nir/tests/opt_varyings_tests_prop_ubo.cpp b/src/compiler/nir/tests/opt_varyings_tests_prop_ubo.cpp index 7efd24920f6..ecd0c6e60be 100644 --- a/src/compiler/nir/tests/opt_varyings_tests_prop_ubo.cpp +++ b/src/compiler/nir/tests/opt_varyings_tests_prop_ubo.cpp @@ -59,7 +59,7 @@ TEST_F(nir_opt_varyings_test_prop_ubo, \ SHADER_UBO_OUTPUT(producer_stage, consumer_stage, slot, comp, type, bitsize, 1, 1) \ \ if (nir_slot_is_sysval_output((gl_varying_slot)pindex, MESA_SHADER_##consumer_stage)) { \ - ASSERT_TRUE(opt_varyings() == nir_progress_consumer); \ + ASSERT_TRUE(opt_varyings() & nir_progress_consumer); \ ASSERT_TRUE(b1->shader->info.outputs_written == BITFIELD64_BIT(pindex)); \ ASSERT_TRUE(nir_intrinsic_io_semantics(store).no_varying); \ } else { \ diff --git a/src/compiler/nir/tests/opt_varyings_tests_prop_uniform.cpp b/src/compiler/nir/tests/opt_varyings_tests_prop_uniform.cpp index 47bea129aa6..0382fc800c0 100644 --- a/src/compiler/nir/tests/opt_varyings_tests_prop_uniform.cpp +++ b/src/compiler/nir/tests/opt_varyings_tests_prop_uniform.cpp @@ -59,7 +59,7 @@ TEST_F(nir_opt_varyings_test_prop_uniform, \ SHADER_UNIFORM_OUTPUT(producer_stage, consumer_stage, slot, comp, type, bitsize, 1, 1) \ \ if (nir_slot_is_sysval_output((gl_varying_slot)pindex, MESA_SHADER_##consumer_stage)) { \ - ASSERT_TRUE(opt_varyings() == nir_progress_consumer); \ + ASSERT_TRUE(opt_varyings() & nir_progress_consumer); \ ASSERT_TRUE(b1->shader->info.outputs_written == BITFIELD64_BIT(pindex)); \ ASSERT_TRUE(nir_intrinsic_io_semantics(store).no_varying); \ } else { \ diff --git a/src/compiler/nir/tests/opt_varyings_tests_prop_uniform_expr.cpp b/src/compiler/nir/tests/opt_varyings_tests_prop_uniform_expr.cpp index a56a440a3cf..15e8d4208bf 100644 --- a/src/compiler/nir/tests/opt_varyings_tests_prop_uniform_expr.cpp +++ b/src/compiler/nir/tests/opt_varyings_tests_prop_uniform_expr.cpp @@ -59,7 +59,7 @@ TEST_F(nir_opt_varyings_test_prop_uniform_expr, \ SHADER_UNI_EXPR_OUTPUT(producer_stage, consumer_stage, slot, comp, type, bitsize, 1, 1) \ \ if (nir_slot_is_sysval_output((gl_varying_slot)pindex, MESA_SHADER_##consumer_stage)) { \ - ASSERT_TRUE(opt_varyings() == nir_progress_consumer); \ + ASSERT_TRUE(opt_varyings() & nir_progress_consumer); \ ASSERT_TRUE(b1->shader->info.outputs_written == BITFIELD64_BIT(pindex)); \ ASSERT_TRUE(nir_intrinsic_io_semantics(store).no_varying); \ } else { \