From 7de352e99e422c422d43a8f2ea5f9f7f2a2ae63c Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Tue, 24 Jun 2025 22:31:39 +0200 Subject: [PATCH] nir,radv: add an option to not move 8/16bit vecs ACO will overestimate the register demand of the sources, so we don't want to create the vector later. Foz-DB Navi48: Totals from 240 (0.30% of 80265) affected shaders: MaxWaves: 6429 -> 6435 (+0.09%) Instrs: 3406069 -> 3406646 (+0.02%); split: -0.01%, +0.03% CodeSize: 18231596 -> 18233288 (+0.01%); split: -0.01%, +0.02% VGPRs: 14768 -> 14732 (-0.24%) Latency: 18981274 -> 18979170 (-0.01%); split: -0.02%, +0.01% InvThroughput: 4247331 -> 4246634 (-0.02%); split: -0.02%, +0.01% VClause: 85453 -> 85458 (+0.01%); split: -0.01%, +0.01% Copies: 262046 -> 261971 (-0.03%); split: -0.06%, +0.03% PreVGPRs: 10899 -> 10775 (-1.14%) VALU: 1923441 -> 1923485 (+0.00%); split: -0.01%, +0.01% SALU: 457983 -> 457982 (-0.00%) VOPD: 4980 -> 4861 (-2.39%); split: +0.48%, -2.87% Reviewed-by: Alyssa Rosenzweig Part-of: --- src/amd/vulkan/radv_pipeline.c | 6 +++--- src/compiler/nir/nir.h | 1 + src/compiler/nir/nir_opt_sink.c | 7 ++++++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 28c0dbd8928..4077aaed7fa 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -416,7 +416,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat NIR_PASS(_, stage->nir, nir_lower_alu_width, opt_vectorize_callback, device); - nir_move_options sink_opts = nir_move_const_undef | nir_move_copies; + nir_move_options sink_opts = nir_move_const_undef | nir_move_copies | nir_dont_move_byte_word_vecs; if (!stage->key.optimisations_disabled) { NIR_PASS(_, stage->nir, nir_opt_licm); @@ -424,7 +424,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat sink_opts |= nir_move_load_input; NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts); - NIR_PASS(_, stage->nir, nir_opt_move, nir_move_load_input | nir_move_const_undef | nir_move_copies); + NIR_PASS(_, stage->nir, nir_opt_move, sink_opts | nir_move_load_input); } /* Lower VS inputs. We need to do this after nir_opt_sink, because @@ -625,7 +625,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts); nir_move_options move_opts = nir_move_const_undef | nir_move_load_ubo | nir_move_load_input | - nir_move_comparisons | nir_move_copies | nir_move_alu; + nir_move_comparisons | nir_move_copies | nir_dont_move_byte_word_vecs | nir_move_alu; NIR_PASS(_, stage->nir, nir_opt_move, move_opts); /* Run nir_opt_move again to make sure that comparision are as close as possible to the first use to prevent SCC diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 04476ecc932..3947faa2ea2 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -6148,6 +6148,7 @@ typedef enum { nir_move_load_ssbo = (1 << 5), nir_move_load_uniform = (1 << 6), nir_move_alu = (1 << 7), + nir_dont_move_byte_word_vecs = (1 << 8), } nir_move_options; bool nir_can_move_instr(nir_instr *instr, nir_move_options options); diff --git a/src/compiler/nir/nir_opt_sink.c b/src/compiler/nir/nir_opt_sink.c index 28db929c5bb..5715c1cd118 100644 --- a/src/compiler/nir/nir_opt_sink.c +++ b/src/compiler/nir/nir_opt_sink.c @@ -70,8 +70,13 @@ can_sink_instr(nir_instr *instr, nir_move_options options, bool *can_mov_out_of_ case nir_instr_type_alu: { nir_alu_instr *alu = nir_instr_as_alu(instr); - if (nir_op_is_vec_or_mov(alu->op) || alu->op == nir_op_b2i32) + if (nir_op_is_vec_or_mov(alu->op) || alu->op == nir_op_b2i32) { + if (nir_op_is_vec(alu->op) && alu->def.bit_size < 32 && + (options & nir_dont_move_byte_word_vecs)) { + return false; + } return options & nir_move_copies; + } if (nir_alu_instr_is_comparison(alu)) return options & nir_move_comparisons;