mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 15:58:05 +02:00
nir/opt_constant_folding: constant-fold op(bcsel(), #c) -> bcsel(.., #c1, #c2)
for all ALU instructions except fneg instead of using nir_opt_algebraic for a small subset. Totals from 17711 (8.49% of 208640) affected shaders: (Navi48) MaxWaves: 364391 -> 364397 (+0.00%); split: +0.01%, -0.01% Instrs: 33873994 -> 33780398 (-0.28%); split: -0.31%, +0.03% CodeSize: 198627596 -> 198259724 (-0.19%); split: -0.23%, +0.05% VGPRs: 1435516 -> 1435144 (-0.03%); split: -0.04%, +0.02% SpillSGPRs: 652827 -> 654577 (+0.27%); split: -0.00%, +0.27% SpillVGPRs: 594840 -> 593598 (-0.21%); split: -0.28%, +0.07% Scratch: 31791360 -> 31543552 (-0.78%) Latency: 417824569 -> 415881858 (-0.46%); split: -0.48%, +0.02% InvThroughput: 80376232 -> 80307996 (-0.08%); split: -0.10%, +0.01% VClause: 557238 -> 554770 (-0.44%); split: -0.50%, +0.06% SClause: 688297 -> 688125 (-0.02%); split: -0.04%, +0.02% Copies: 3571756 -> 3566704 (-0.14%); split: -0.44%, +0.29% Branches: 628710 -> 628576 (-0.02%); split: -0.07%, +0.05% PreSGPRs: 1100316 -> 1103478 (+0.29%); split: -0.02%, +0.30% PreVGPRs: 1132139 -> 1128765 (-0.30%); split: -0.30%, +0.00% VALU: 18944830 -> 18912030 (-0.17%); split: -0.20%, +0.03% SALU: 4363054 -> 4342748 (-0.47%); split: -0.57%, +0.10% VMEM: 1894420 -> 1891754 (-0.14%); split: -0.19%, +0.05% SMEM: 1073860 -> 1073741 (-0.01%); split: -0.01%, +0.00% VOPD: 1734659 -> 1735718 (+0.06%); split: +0.20%, -0.14% Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40848>
This commit is contained in:
parent
8b1c60add4
commit
f4812dc11d
2 changed files with 85 additions and 6 deletions
|
|
@ -55,10 +55,41 @@ const_value_for_alu(nir_builder *b, nir_alu_instr *alu, unsigned bit_size,
|
|||
dest);
|
||||
}
|
||||
|
||||
static bool
|
||||
is_bcsel_with_two_constants(nir_alu_instr *bcsel)
|
||||
{
|
||||
return bcsel && bcsel->op == nir_op_bcsel &&
|
||||
bcsel->def.num_components == 1 &&
|
||||
bcsel->src[0].swizzle[0] == 0 &&
|
||||
bcsel->src[0].src.ssa->num_components == 1 &&
|
||||
nir_src_is_const(bcsel->src[1].src) &&
|
||||
nir_src_is_const(bcsel->src[2].src);
|
||||
}
|
||||
|
||||
static bool
|
||||
should_fold_bcsel(nir_alu_instr *alu)
|
||||
{
|
||||
/* Don't fold bcsel if the resulting bit size is larger than 32 bit
|
||||
* as these commonly require two instructions.
|
||||
*/
|
||||
if (alu->def.bit_size > 32)
|
||||
return false;
|
||||
|
||||
/* Don't fight with nir_lower_load_const_to_scalar. */
|
||||
if (nir_op_is_vec_or_mov(alu->op))
|
||||
return false;
|
||||
|
||||
/* Make an exception for fneg, because in many cases it can be
|
||||
* folded with the next instruction.
|
||||
*/
|
||||
return alu->op != nir_op_fneg;
|
||||
}
|
||||
|
||||
nir_def *
|
||||
nir_try_constant_fold_alu(nir_builder *b, nir_alu_instr *alu)
|
||||
{
|
||||
nir_const_value src[NIR_ALU_MAX_INPUTS][NIR_MAX_VEC_COMPONENTS];
|
||||
nir_def *bcsel = NULL;
|
||||
|
||||
/* In the case that any outputs/inputs have unsized types, then we need to
|
||||
* guess the bit-size. In this case, the validator ensures that all
|
||||
|
|
@ -79,19 +110,57 @@ nir_try_constant_fold_alu(nir_builder *b, nir_alu_instr *alu)
|
|||
bit_size = alu->src[i].src.ssa->bit_size;
|
||||
|
||||
nir_load_const_instr *load_const = nir_src_as_load_const(alu->src[i].src);
|
||||
if (!load_const)
|
||||
return NULL;
|
||||
|
||||
for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(alu, i);
|
||||
j++) {
|
||||
src[i][j] = load_const->value[alu->src[i].swizzle[j]];
|
||||
if (load_const) {
|
||||
for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(alu, i); j++)
|
||||
src[i][j] = load_const->value[alu->src[i].swizzle[j]];
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Check if the source is a bcsel with two constants. */
|
||||
nir_alu_instr *bcsel_alu = nir_src_as_alu(alu->src[i].src);
|
||||
if (should_fold_bcsel(alu) && is_bcsel_with_two_constants(bcsel_alu)) {
|
||||
/* If there is multiple bcsel sources, they must use the same condition. */
|
||||
if (bcsel && bcsel_alu->src[0].src.ssa != bcsel)
|
||||
return false;
|
||||
|
||||
bcsel = bcsel_alu->src[0].src.ssa;
|
||||
|
||||
/* Use first bcsel constant. */
|
||||
load_const = nir_src_as_load_const(bcsel_alu->src[1].src);
|
||||
for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(alu, i); j++)
|
||||
src[i][j] = load_const->value[bcsel_alu->src[1].swizzle[alu->src[i].swizzle[j]]];
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (bit_size == 0)
|
||||
bit_size = 32;
|
||||
|
||||
return const_value_for_alu(b, alu, bit_size, src);
|
||||
/* If all sources are constant, we can fold the ALU. */
|
||||
if (!bcsel)
|
||||
return const_value_for_alu(b, alu, bit_size, src);
|
||||
|
||||
/* At least one source is a bcsel with two constants. Fold the ALU twice
|
||||
* and create a new bcsel, selecting between the folded values.
|
||||
*/
|
||||
nir_def *then_const = const_value_for_alu(b, alu, bit_size, src);
|
||||
|
||||
/* Create second bcsel constant. */
|
||||
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
|
||||
nir_alu_instr *bcsel_alu = nir_src_as_alu(alu->src[i].src);
|
||||
if (!bcsel_alu)
|
||||
continue;
|
||||
|
||||
nir_load_const_instr *load_const = nir_src_as_load_const(bcsel_alu->src[2].src);
|
||||
for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(alu, i); j++) {
|
||||
src[i][j] = load_const->value[bcsel_alu->src[2].swizzle[alu->src[i].swizzle[j]]];
|
||||
}
|
||||
}
|
||||
nir_def *else_const = const_value_for_alu(b, alu, bit_size, src);
|
||||
|
||||
return nir_bcsel(b, bcsel, then_const, else_const);
|
||||
}
|
||||
|
||||
static nir_const_value *
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
#include <inttypes.h>
|
||||
#include "util/half_float.h"
|
||||
#include "nir_builder.h"
|
||||
#include "nir_opcodes.h"
|
||||
#include "nir_worklist.h"
|
||||
|
||||
/* This should be the same as nir_search_max_comm_ops in nir_algebraic.py. */
|
||||
|
|
@ -441,6 +442,15 @@ construct_value(nir_builder *build,
|
|||
if (const_expr) {
|
||||
nir_instr_free(&alu->instr);
|
||||
def = const_expr;
|
||||
if (nir_def_is_alu(def)) {
|
||||
/* The instruction got folded into bcsel of two constants. */
|
||||
nir_alu_instr *bcsel = nir_def_as_alu(def);
|
||||
assert(bcsel->op == nir_op_bcsel);
|
||||
util_dynarray_append_typed(state->states, uint16_t, 0);
|
||||
nir_algebraic_automaton(nir_src_parent_instr(&bcsel->src[1].src), state->states, state->pass_op_table);
|
||||
util_dynarray_append_typed(state->states, uint16_t, 0);
|
||||
nir_algebraic_automaton(nir_src_parent_instr(&bcsel->src[2].src), state->states, state->pass_op_table);
|
||||
}
|
||||
} else {
|
||||
nir_builder_instr_insert(build, &alu->instr);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue