From 2f7fdcef1f08118531066b1aec4601756e75f9f1 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Thu, 3 Jul 2025 06:27:09 +0200 Subject: [PATCH] nir/opt_uniform_subgroup: use ballot_bit_count Using bit_count on the result of ballot doesn't work for targets where ballot's num_components > 1. Signed-off-by: Job Noorman Reviewed-by: Emma Anholt Fixes: d2e1e4442aa ("ir3: enable nir_opt_uniform_subgroup") Part-of: (cherry picked from commit ae66bd1c007af48f609147979cef550d04be05ed) --- .pick_status.json | 2 +- src/compiler/nir/nir_opt_uniform_subgroup.c | 25 +++++++++++++------ .../ci/freedreno-a750-vkd3d-fails.txt | 3 --- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 1e0bdc438b2..7a52577b713 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -614,7 +614,7 @@ "description": "nir/opt_uniform_subgroup: use ballot_bit_count", "nominated": true, "nomination_type": 2, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "d2e1e4442aa1a1121fae44666efae374bd94190a", "notes": null diff --git a/src/compiler/nir/nir_opt_uniform_subgroup.c b/src/compiler/nir/nir_opt_uniform_subgroup.c index df269856943..429dfb8295a 100644 --- a/src/compiler/nir/nir_opt_uniform_subgroup.c +++ b/src/compiler/nir/nir_opt_uniform_subgroup.c @@ -69,9 +69,18 @@ opt_uniform_subgroup_filter(const nir_instr *instr, const void *_state) } } +static nir_def * +ballot_bit_count(nir_builder *b, nir_def *ballot) +{ + return ballot->num_components == 1 + ? nir_bit_count(b, ballot) + : nir_ballot_bit_count_reduce(b, ballot->bit_size, ballot); +} + static nir_def * count_active_invocations(nir_builder *b, nir_def *value, bool inclusive, - bool has_mbcnt_amd) + bool has_mbcnt_amd, + const nir_lower_subgroups_options *options) { /* For the non-inclusive case, the two paths are functionally the same. * For the inclusive case, the are similar but very subtly different. @@ -91,11 +100,13 @@ count_active_invocations(nir_builder *b, nir_def *value, bool inclusive, if (has_mbcnt_amd) { return nir_mbcnt_amd(b, value, nir_imm_int(b, (int)inclusive)); } else { - nir_def *mask = inclusive - ? nir_load_subgroup_le_mask(b, 1, 32) - : nir_load_subgroup_lt_mask(b, 1, 32); + nir_def *mask = + inclusive ? nir_load_subgroup_le_mask(b, options->ballot_components, + options->ballot_bit_size) + : nir_load_subgroup_lt_mask(b, options->ballot_components, + options->ballot_bit_size); - return nir_bit_count(b, nir_iand(b, value, mask)); + return ballot_bit_count(b, nir_iand(b, value, mask)); } } @@ -119,11 +130,11 @@ opt_uniform_subgroup_instr(nir_builder *b, nir_instr *instr, void *_state) options->ballot_bit_size, nir_imm_true(b)); if (intrin->intrinsic == nir_intrinsic_reduce) { - count = nir_bit_count(b, ballot); + count = ballot_bit_count(b, ballot); } else { count = count_active_invocations(b, ballot, intrin->intrinsic == nir_intrinsic_inclusive_scan, - false); + false, options); } const unsigned bit_size = intrin->src[0].ssa->bit_size; diff --git a/src/freedreno/ci/freedreno-a750-vkd3d-fails.txt b/src/freedreno/ci/freedreno-a750-vkd3d-fails.txt index 8d0800264a2..e206d4d39c4 100644 --- a/src/freedreno/ci/freedreno-a750-vkd3d-fails.txt +++ b/src/freedreno/ci/freedreno-a750-vkd3d-fails.txt @@ -1,5 +1,2 @@ test_shader_instructions,Fail test_line_rasterization,Fail - -# error: src->ssa->num_components == num_components (../src/compiler/nir/nir_validate.c:205) -test_shader_waveop_maximal_convergence,Crash