From e6d208b1f9dd39726abb09df9742ea7d2ef26bd5 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Tue, 24 Jun 2025 11:26:06 +0200 Subject: [PATCH] nir/opt_shrink_vectors: also split vecs into distinct smaller vecs if possible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foz-DB Navi48: Totals from 17 (0.02% of 80265) affected shaders: Instrs: 75085 -> 74912 (-0.23%); split: -0.23%, +0.00% CodeSize: 428968 -> 427028 (-0.45%); split: -0.45%, +0.00% Latency: 1306841 -> 1306080 (-0.06%); split: -0.06%, +0.00% InvThroughput: 598998 -> 598719 (-0.05%) Copies: 15733 -> 15561 (-1.09%) Branches: 2435 -> 2422 (-0.53%) PreVGPRs: 1723 -> 1721 (-0.12%) VALU: 43019 -> 42847 (-0.40%) Reviewed-by: Daniel Schürmann Part-of: --- src/compiler/nir/nir_opt_shrink_vectors.c | 69 ++++++++++++++++++----- 1 file changed, 56 insertions(+), 13 deletions(-) diff --git a/src/compiler/nir/nir_opt_shrink_vectors.c b/src/compiler/nir/nir_opt_shrink_vectors.c index 71101892c9c..6fb92600dd7 100644 --- a/src/compiler/nir/nir_opt_shrink_vectors.c +++ b/src/compiler/nir/nir_opt_shrink_vectors.c @@ -181,19 +181,13 @@ shrink_intrinsic_to_non_sparse(nir_intrinsic_instr *instr) } static bool -opt_shrink_vector(nir_builder *b, nir_alu_instr *instr) +create_smaller_vec(nir_builder *b, nir_alu_instr *vec, nir_component_mask_t mask) { - nir_def *def = &instr->def; - unsigned mask = nir_def_components_read(def); - - /* If nothing was read, leave it up to DCE. */ - if (mask == 0) - return false; - - /* don't remove any channels if used by non-ALU */ - if (!is_only_used_by_alu(def)) + /* Leave these for copy propagation. */ + if (util_is_power_of_two_or_zero(mask)) return false; + nir_def *def = &vec->def; uint8_t reswizzle[NIR_MAX_VEC_COMPONENTS] = { 0 }; nir_scalar srcs[NIR_MAX_VEC_COMPONENTS] = { 0 }; unsigned num_components = 0; @@ -201,7 +195,7 @@ opt_shrink_vector(nir_builder *b, nir_alu_instr *instr) if (!((mask >> i) & 0x1)) continue; - nir_scalar scalar = nir_get_scalar(instr->src[i].src.ssa, instr->src[i].swizzle[0]); + nir_scalar scalar = nir_scalar_resolved(def, i); /* Try reuse a component with the same value */ unsigned j; @@ -229,12 +223,61 @@ opt_shrink_vector(nir_builder *b, nir_alu_instr *instr) /* create new vecN and replace uses */ nir_def *new_vec = nir_vec_scalars(b, srcs, num_components); - nir_def_rewrite_uses(def, new_vec); + + nir_foreach_use_safe(src, def) { + if (nir_src_components_read(src) & mask) + nir_src_rewrite(src, new_vec); + } reswizzle_alu_uses(new_vec, reswizzle); return true; } +static bool +opt_shrink_or_split_vector(nir_builder *b, nir_alu_instr *vec) +{ + /* Try to split vec into multiple distinct smaller vecs. */ + nir_component_mask_t use_masks[NIR_MAX_VEC_COMPONENTS] = { 0 }; + unsigned use_mask_count = 0; + + nir_foreach_use_including_if(src, &vec->def) { + /* don't remove any channels if used by non-ALU */ + if (nir_src_is_if(src) || nir_src_parent_instr(src)->type != nir_instr_type_alu) + return false; + + nir_component_mask_t read = nir_src_components_read(src); + bool mask_found = false; + for (unsigned i = 0; i < use_mask_count; i++) { + if (!(use_masks[i] & read)) + continue; + + use_masks[i] |= read; + + /* Merge overlapping use_masks. */ + unsigned k = i + 1; + for (unsigned j = i + 1; j < use_mask_count; j++) { + if (use_masks[i] & use_masks[j]) + use_masks[i] |= use_masks[j]; + else + use_masks[k++] = use_masks[j]; + } + use_mask_count = k; + + mask_found = true; + break; + } + + if (!mask_found) + use_masks[use_mask_count++] = read; + } + + bool progress = false; + for (unsigned i = 0; i < use_mask_count; i++) + progress |= create_smaller_vec(b, vec, use_masks[i]); + + return progress; +} + static bool opt_shrink_vectors_alu(nir_builder *b, nir_alu_instr *instr) { @@ -245,7 +288,7 @@ opt_shrink_vectors_alu(nir_builder *b, nir_alu_instr *instr) return false; if (nir_op_is_vec(instr->op)) - return opt_shrink_vector(b, instr); + return opt_shrink_or_split_vector(b, instr); if (nir_op_infos[instr->op].output_size != 0) return false;