nir,radv: add an option to not move 8/16bit vecs

ACO will overestimate the register demand of the sources, so we don't
want to create the vector later.

Foz-DB Navi48:
Totals from 240 (0.30% of 80265) affected shaders:
MaxWaves: 6429 -> 6435 (+0.09%)
Instrs: 3406069 -> 3406646 (+0.02%); split: -0.01%, +0.03%
CodeSize: 18231596 -> 18233288 (+0.01%); split: -0.01%, +0.02%
VGPRs: 14768 -> 14732 (-0.24%)
Latency: 18981274 -> 18979170 (-0.01%); split: -0.02%, +0.01%
InvThroughput: 4247331 -> 4246634 (-0.02%); split: -0.02%, +0.01%
VClause: 85453 -> 85458 (+0.01%); split: -0.01%, +0.01%
Copies: 262046 -> 261971 (-0.03%); split: -0.06%, +0.03%
PreVGPRs: 10899 -> 10775 (-1.14%)
VALU: 1923441 -> 1923485 (+0.00%); split: -0.01%, +0.01%
SALU: 457983 -> 457982 (-0.00%)
VOPD: 4980 -> 4861 (-2.39%); split: +0.48%, -2.87%

Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35729>
This commit is contained in:
Georg Lehmann 2025-06-24 22:31:39 +02:00 committed by Marge Bot
parent 7ac9a87572
commit 7de352e99e
3 changed files with 10 additions and 4 deletions

View file

@ -416,7 +416,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
NIR_PASS(_, stage->nir, nir_lower_alu_width, opt_vectorize_callback, device);
nir_move_options sink_opts = nir_move_const_undef | nir_move_copies;
nir_move_options sink_opts = nir_move_const_undef | nir_move_copies | nir_dont_move_byte_word_vecs;
if (!stage->key.optimisations_disabled) {
NIR_PASS(_, stage->nir, nir_opt_licm);
@ -424,7 +424,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
sink_opts |= nir_move_load_input;
NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts);
NIR_PASS(_, stage->nir, nir_opt_move, nir_move_load_input | nir_move_const_undef | nir_move_copies);
NIR_PASS(_, stage->nir, nir_opt_move, sink_opts | nir_move_load_input);
}
/* Lower VS inputs. We need to do this after nir_opt_sink, because
@ -625,7 +625,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts);
nir_move_options move_opts = nir_move_const_undef | nir_move_load_ubo | nir_move_load_input |
nir_move_comparisons | nir_move_copies | nir_move_alu;
nir_move_comparisons | nir_move_copies | nir_dont_move_byte_word_vecs | nir_move_alu;
NIR_PASS(_, stage->nir, nir_opt_move, move_opts);
/* Run nir_opt_move again to make sure that comparision are as close as possible to the first use to prevent SCC

View file

@ -6148,6 +6148,7 @@ typedef enum {
nir_move_load_ssbo = (1 << 5),
nir_move_load_uniform = (1 << 6),
nir_move_alu = (1 << 7),
nir_dont_move_byte_word_vecs = (1 << 8),
} nir_move_options;
bool nir_can_move_instr(nir_instr *instr, nir_move_options options);

View file

@ -70,8 +70,13 @@ can_sink_instr(nir_instr *instr, nir_move_options options, bool *can_mov_out_of_
case nir_instr_type_alu: {
nir_alu_instr *alu = nir_instr_as_alu(instr);
if (nir_op_is_vec_or_mov(alu->op) || alu->op == nir_op_b2i32)
if (nir_op_is_vec_or_mov(alu->op) || alu->op == nir_op_b2i32) {
if (nir_op_is_vec(alu->op) && alu->def.bit_size < 32 &&
(options & nir_dont_move_byte_word_vecs)) {
return false;
}
return options & nir_move_copies;
}
if (nir_alu_instr_is_comparison(alu))
return options & nir_move_comparisons;