agx: Refuse to handle discontiguous iter

This will cause problems with register allocation.

instructions HURT:   shaders/glmark/1-24.shader_test MESA_SHADER_FRAGMENT: 135 -> 136 (0.74%)
instructions HURT:   shaders/glmark/1-8.shader_test MESA_SHADER_FRAGMENT: 84 -> 85 (1.19%)

bytes HURT:   shaders/glmark/1-24.shader_test MESA_SHADER_FRAGMENT: 914 -> 922 (0.88%)
bytes HURT:   shaders/glmark/1-8.shader_test MESA_SHADER_FRAGMENT: 574 -> 580 (1.05%)

halfregs helped:   shaders/glmark/1-8.shader_test MESA_SHADER_FRAGMENT: 20 -> 19 (-5.00%)
halfregs helped:   shaders/glmark/1-24.shader_test MESA_SHADER_FRAGMENT: 25 -> 23 (-8.00%)
halfregs helped:   shaders/glmark/7-3.shader_test MESA_SHADER_FRAGMENT: 11 -> 10 (-9.09%)
halfregs helped:   shaders/glmark/4-2.shader_test MESA_SHADER_FRAGMENT: 23 -> 19 (-17.39%)

total instructions in shared programs: 5716 -> 5718 (0.03%)
instructions in affected programs: 219 -> 221 (0.91%)
helped: 0
HURT: 2

total bytes in shared programs: 38118 -> 38132 (0.04%)
bytes in affected programs: 1488 -> 1502 (0.94%)
helped: 0
HURT: 2

total halfregs in shared programs: 1639 -> 1631 (-0.49%)
halfregs in affected programs: 79 -> 71 (-10.13%)
helped: 4
HURT: 0
helped stats (abs) min: 1.0 max: 4.0 x̄: 2.00 x̃: 1
helped stats (rel) min: 5.00% max: 17.39% x̄: 9.87% x̃: 8.55%
95% mean confidence interval for halfregs value: -4.25 0.25
95% mean confidence interval for halfregs %-change: -18.31% -1.43%
Inconclusive result (value mean confidence interval includes 0).

Total CPU time (seconds): 11.41 -> 11.72 (2.72%)

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19590>
This commit is contained in:
Alyssa Rosenzweig 2022-10-24 22:30:46 -04:00 committed by Marge Bot
parent af2137883c
commit 9a48c35668
4 changed files with 96 additions and 1 deletions

View file

@ -441,12 +441,17 @@ agx_emit_load_vary(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr)
nir_src *offset = nir_get_io_offset_src(instr);
assert(nir_src_is_const(*offset) && "no indirects");
assert(nir_ssa_def_components_read(&instr->dest.ssa) ==
nir_component_mask(components) &&
"iter does not handle write-after-write hazards");
/* For perspective interpolation, we need W */
agx_index J = !perspective ? agx_zero() :
agx_get_cf(b->shader, true, false, VARYING_SLOT_POS, 3, 1);
agx_index I = agx_get_cf(b->shader, true, perspective,
sem.location + nir_src_as_uint(*offset), 0,
sem.location + nir_src_as_uint(*offset),
nir_intrinsic_component(instr),
components);
agx_iter_to(b, dest, I, J, components, perspective);
@ -1687,6 +1692,11 @@ agx_optimize_nir(nir_shader *nir, unsigned *preamble_size)
NIR_PASS_V(nir, agx_nir_opt_preamble, preamble_size);
NIR_PASS_V(nir, nir_opt_algebraic_late);
NIR_PASS_V(nir, nir_opt_constant_folding);
/* Must run after uses are fixed but before a last round of copyprop + DCE */
if (nir->info.stage == MESA_SHADER_FRAGMENT)
NIR_PASS_V(nir, agx_nir_lower_load_mask);
NIR_PASS_V(nir, nir_copy_prop);
NIR_PASS_V(nir, nir_opt_dce);
NIR_PASS_V(nir, nir_opt_cse);

View file

@ -791,6 +791,7 @@ void agx_liveness_ins_update(BITSET_WORD *live, agx_instr *I);
bool agx_lower_resinfo(nir_shader *s);
bool agx_nir_lower_array_texture(nir_shader *s);
bool agx_nir_opt_preamble(nir_shader *s, unsigned *preamble_size);
bool agx_nir_lower_load_mask(nir_shader *shader);
#ifdef __cplusplus
} /* extern C */

View file

@ -0,0 +1,83 @@
/*
* Copyright 2022 Alyssa Rosenzweig
* SPDX-License-Identifier: MIT
*/
#include "agx_compiler.h"
#include "compiler/nir/nir_builder.h"
/*
* Lower load_interpolated_input instructions with unused components of their
* destination, duplicating the intrinsic and shrinking to avoid the holes.
* load_interpolated_input becomes iter instructions, which lack a write mask.
*/
static bool
pass(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic != nir_intrinsic_load_interpolated_input)
return false;
unsigned mask = nir_ssa_def_components_read(&intr->dest.ssa);
if (mask == 0 || mask == nir_component_mask(intr->num_components))
return false;
b->cursor = nir_before_instr(instr);
unsigned bit_size = nir_dest_bit_size(intr->dest);
nir_ssa_def *comps[4] = { NULL };
for (unsigned c = 0; c < intr->num_components; ++c) {
if (mask & BITFIELD_BIT(c)) {
/* Count contiguous components to combine with */
unsigned next_mask = mask >> c;
unsigned next_zero = ffs(~next_mask);
unsigned count = next_zero - 1;
assert(next_zero >= 2);
assert(count >= 1);
nir_instr *clone = nir_instr_clone(b->shader, instr);
nir_intrinsic_instr *clone_intr = nir_instr_as_intrinsic(clone);
/* Shrink the load to count contiguous components */
nir_ssa_dest_init(clone, &clone_intr->dest, count, bit_size, NULL);
nir_ssa_def *clone_vec = &clone_intr->dest.ssa;
clone_intr->num_components = count;
/* The load starts from component c relative to the original load */
nir_intrinsic_set_component(clone_intr,
nir_intrinsic_component(intr) + c);
nir_builder_instr_insert(b, &clone_intr->instr);
/* The destination is a vector with `count` components, extract the
* components so we can recombine into the final vector.
*/
for (unsigned d = 0; d < count; ++d)
comps[c + d] = nir_channel(b, clone_vec, d);
c += (count - 1);
} else {
/* The value of unused components is irrelevant, but use an undef for
* semantics. It will be eliminated by DCE after copyprop.
*/
comps[c] = nir_ssa_undef(b, 1, bit_size);
}
}
nir_ssa_def_rewrite_uses(&intr->dest.ssa,
nir_vec(b, comps, intr->num_components));
return true;
}
bool
agx_nir_lower_load_mask(nir_shader *shader)
{
return nir_shader_instructions_pass(shader, pass,
nir_metadata_block_index |
nir_metadata_dominance,
NULL);
}

View file

@ -24,6 +24,7 @@ libasahi_agx_files = files(
'agx_dce.c',
'agx_liveness.c',
'agx_nir_lower_array_texture.c',
'agx_nir_lower_load_mask.c',
'agx_nir_opt_preamble.c',
'agx_lower_64bit.c',
'agx_lower_resinfo.c',