mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-16 14:50:42 +01:00
nir/loop_analyze: determine for all ALU whether it can be constant-folded
Totals from 16 (0.02% of 79839) affected shaders: (Navi48) MaxWaves: 512 -> 464 (-9.38%) Instrs: 11821 -> 17205 (+45.55%) CodeSize: 60536 -> 86644 (+43.13%) VGPRs: 732 -> 804 (+9.84%) Latency: 68411 -> 39349 (-42.48%) InvThroughput: 14217 -> 9306 (-34.54%) VClause: 223 -> 302 (+35.43%) SClause: 262 -> 317 (+20.99%) Copies: 961 -> 696 (-27.58%); split: -39.23%, +11.65% Branches: 182 -> 158 (-13.19%); split: -29.67%, +16.48% PreSGPRs: 1210 -> 945 (-21.90%); split: -29.42%, +7.52% PreVGPRs: 647 -> 633 (-2.16%) VALU: 5112 -> 10857 (+112.38%) SALU: 3215 -> 2335 (-27.37%); split: -30.67%, +3.30% VMEM: 228 -> 349 (+53.07%) SMEM: 567 -> 549 (-3.17%); split: -3.70%, +0.53% Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38659>
This commit is contained in:
parent
1d6f85a207
commit
4997d8fb1b
2 changed files with 71 additions and 42 deletions
|
|
@ -48,21 +48,19 @@ get_loop_var(nir_def *value, loop_info_state *state)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/* If a condition is a comparision between a constant and
|
||||
* a basic induction variable we know that it will be eliminated once
|
||||
* the loop is unrolled.
|
||||
/* If an instruction only depends on basic induction variables
|
||||
* and constants, we know that it will be eliminated once the
|
||||
* loop is unrolled.
|
||||
*/
|
||||
static bool
|
||||
condition_can_constant_fold(loop_info_state *state, nir_scalar cond_scalar)
|
||||
is_const_after_unrolling(loop_info_state *state, nir_def *def)
|
||||
{
|
||||
nir_scalar lhs = nir_scalar_chase_alu_src(cond_scalar, 0);
|
||||
nir_scalar rhs = nir_scalar_chase_alu_src(cond_scalar, 1);
|
||||
nir_instr *instr = nir_def_instr(def);
|
||||
if (instr->pass_flags == 0)
|
||||
return false;
|
||||
|
||||
if (nir_scalar_is_const(lhs) && get_loop_var(rhs.def, state))
|
||||
return true;
|
||||
if (nir_scalar_is_const(rhs) && get_loop_var(lhs.def, state))
|
||||
return true;
|
||||
return false;
|
||||
/* The pass flags are only correct within the loop. */
|
||||
return instr->block->index >= nir_loop_first_block(state->loop)->index;
|
||||
}
|
||||
|
||||
/** Calculate an estimated cost in number of instructions
|
||||
|
|
@ -87,29 +85,15 @@ instr_cost(loop_info_state *state, nir_instr *instr,
|
|||
const nir_op_info *info = &nir_op_infos[alu->op];
|
||||
unsigned cost = 1;
|
||||
|
||||
if (nir_op_is_selection(alu->op)) {
|
||||
bool can_constant_fold = true;
|
||||
for (unsigned i = 0; can_constant_fold && i < alu->def.num_components; i++) {
|
||||
nir_scalar cond_scalar = nir_scalar_chase_alu_src(nir_get_scalar(&alu->def, i), 0);
|
||||
can_constant_fold &= nir_is_terminator_condition_with_two_inputs(cond_scalar) &&
|
||||
condition_can_constant_fold(state, cond_scalar);
|
||||
}
|
||||
/* Check if this instruction can be constant-folded after unrolling. */
|
||||
if (is_const_after_unrolling(state, &alu->def))
|
||||
return 0;
|
||||
|
||||
if (nir_op_is_selection(alu->op) && is_const_after_unrolling(state, alu->src[0].src.ssa)) {
|
||||
/* If the condition can be constant folded after the loop is unrolled,
|
||||
* so can the selection.
|
||||
*/
|
||||
if (can_constant_fold)
|
||||
return 0;
|
||||
} else if (nir_alu_instr_is_comparison(alu) &&
|
||||
nir_op_infos[alu->op].num_inputs == 2) {
|
||||
bool can_constant_fold = true;
|
||||
for (unsigned i = 0; can_constant_fold && i < alu->def.num_components; i++) {
|
||||
nir_scalar cond_scalar = nir_get_scalar(&alu->def, i);
|
||||
can_constant_fold &= condition_can_constant_fold(state, cond_scalar);
|
||||
}
|
||||
|
||||
if (can_constant_fold)
|
||||
return 0;
|
||||
return 0;
|
||||
} else if (nir_op_is_vec_or_mov(alu->op)) {
|
||||
/* movs and vecs are likely free. */
|
||||
return 0;
|
||||
|
|
@ -1386,6 +1370,57 @@ force_unroll_heuristics(loop_info_state *state, nir_block *block)
|
|||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
gather_constant_fold_info(loop_info_state *state, nir_instr *instr)
|
||||
{
|
||||
instr->pass_flags = 0;
|
||||
|
||||
/* Loop induction variables with constant initializer and constant
|
||||
* update source get constant-folded when the loop is being unrolled.
|
||||
*/
|
||||
if (instr->type == nir_instr_type_phi &&
|
||||
instr->block == nir_loop_first_block(state->loop)) {
|
||||
nir_loop_induction_variable *var = get_loop_var(nir_instr_def(instr), state);
|
||||
|
||||
instr->pass_flags = var && nir_def_is_const(var->init_src->ssa) &&
|
||||
nir_def_is_const(var->update_src->src.ssa);
|
||||
}
|
||||
|
||||
if (instr->type != nir_instr_type_alu)
|
||||
return;
|
||||
|
||||
/* ALU instruction which only depend on constants and constant-foldable
|
||||
* sources, can also be constant-folded.
|
||||
*/
|
||||
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
||||
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
|
||||
if(!nir_src_is_const(alu->src[i].src) &&
|
||||
!is_const_after_unrolling(state, alu->src[i].src.ssa))
|
||||
return;
|
||||
}
|
||||
|
||||
instr->pass_flags = 1;
|
||||
}
|
||||
|
||||
static void
|
||||
gather_unroll_heuristic_info(loop_info_state *state, const nir_shader_compiler_options *options)
|
||||
{
|
||||
nir_foreach_block_in_cf_node(block, &state->loop->cf_node) {
|
||||
/* Calculate instruction cost. */
|
||||
nir_foreach_instr(instr, block) {
|
||||
gather_constant_fold_info(state, instr);
|
||||
state->loop->info->instr_cost += instr_cost(state, instr, options);
|
||||
}
|
||||
|
||||
if (state->loop->info->force_unroll)
|
||||
continue;
|
||||
|
||||
if (force_unroll_heuristics(state, block)) {
|
||||
state->loop->info->force_unroll = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
get_loop_info(loop_info_state *state, nir_function_impl *impl)
|
||||
{
|
||||
|
|
@ -1413,18 +1448,7 @@ get_loop_info(loop_info_state *state, nir_function_impl *impl)
|
|||
impl->function->shader->info.float_controls_execution_mode,
|
||||
impl->function->shader->options->max_unroll_iterations);
|
||||
|
||||
nir_foreach_block_in_cf_node(block, &state->loop->cf_node) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
state->loop->info->instr_cost += instr_cost(state, instr, options);
|
||||
}
|
||||
|
||||
if (state->loop->info->force_unroll)
|
||||
continue;
|
||||
|
||||
if (force_unroll_heuristics(state, block)) {
|
||||
state->loop->info->force_unroll = true;
|
||||
}
|
||||
}
|
||||
gather_unroll_heuristic_info(state, options);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1484,6 +1508,7 @@ nir_loop_analyze_impl(nir_function_impl *impl,
|
|||
bool force_unroll_sampler_indirect)
|
||||
{
|
||||
struct hash_table *range_ht = _mesa_pointer_hash_table_create(NULL);
|
||||
nir_metadata_require(impl, nir_metadata_block_index);
|
||||
|
||||
foreach_list_typed(nir_cf_node, node, node, &impl->body)
|
||||
process_loops(node, indirect_mask, force_unroll_sampler_indirect, range_ht);
|
||||
|
|
|
|||
|
|
@ -106,3 +106,7 @@ dEQP-VK.memory.mapping.dedicated_alloc.buffer.full.variable.implicit_unmap
|
|||
dEQP-VK.memory.mapping.dedicated_alloc.buffer.full.variable.implicit_unmap_map2
|
||||
dEQP-VK.memory.mapping.dedicated_alloc.image.full.variable.implicit_unmap
|
||||
dEQP-VK.memory.mapping.dedicated_alloc.image.full.variable.implicit_unmap_map2
|
||||
|
||||
# Generates too many immediates due to loop unrolling
|
||||
KHR-GLES31.core.geometry_shader.limits.max_output_components
|
||||
KHR-GL46.geometry_shader.limits.max_output_components
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue