mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 20:28:04 +02:00
agx: Refuse to handle discontiguous iter
This will cause problems with register allocation. instructions HURT: shaders/glmark/1-24.shader_test MESA_SHADER_FRAGMENT: 135 -> 136 (0.74%) instructions HURT: shaders/glmark/1-8.shader_test MESA_SHADER_FRAGMENT: 84 -> 85 (1.19%) bytes HURT: shaders/glmark/1-24.shader_test MESA_SHADER_FRAGMENT: 914 -> 922 (0.88%) bytes HURT: shaders/glmark/1-8.shader_test MESA_SHADER_FRAGMENT: 574 -> 580 (1.05%) halfregs helped: shaders/glmark/1-8.shader_test MESA_SHADER_FRAGMENT: 20 -> 19 (-5.00%) halfregs helped: shaders/glmark/1-24.shader_test MESA_SHADER_FRAGMENT: 25 -> 23 (-8.00%) halfregs helped: shaders/glmark/7-3.shader_test MESA_SHADER_FRAGMENT: 11 -> 10 (-9.09%) halfregs helped: shaders/glmark/4-2.shader_test MESA_SHADER_FRAGMENT: 23 -> 19 (-17.39%) total instructions in shared programs: 5716 -> 5718 (0.03%) instructions in affected programs: 219 -> 221 (0.91%) helped: 0 HURT: 2 total bytes in shared programs: 38118 -> 38132 (0.04%) bytes in affected programs: 1488 -> 1502 (0.94%) helped: 0 HURT: 2 total halfregs in shared programs: 1639 -> 1631 (-0.49%) halfregs in affected programs: 79 -> 71 (-10.13%) helped: 4 HURT: 0 helped stats (abs) min: 1.0 max: 4.0 x̄: 2.00 x̃: 1 helped stats (rel) min: 5.00% max: 17.39% x̄: 9.87% x̃: 8.55% 95% mean confidence interval for halfregs value: -4.25 0.25 95% mean confidence interval for halfregs %-change: -18.31% -1.43% Inconclusive result (value mean confidence interval includes 0). Total CPU time (seconds): 11.41 -> 11.72 (2.72%) Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19590>
This commit is contained in:
parent
af2137883c
commit
9a48c35668
4 changed files with 96 additions and 1 deletions
|
|
@ -441,12 +441,17 @@ agx_emit_load_vary(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr)
|
|||
nir_src *offset = nir_get_io_offset_src(instr);
|
||||
assert(nir_src_is_const(*offset) && "no indirects");
|
||||
|
||||
assert(nir_ssa_def_components_read(&instr->dest.ssa) ==
|
||||
nir_component_mask(components) &&
|
||||
"iter does not handle write-after-write hazards");
|
||||
|
||||
/* For perspective interpolation, we need W */
|
||||
agx_index J = !perspective ? agx_zero() :
|
||||
agx_get_cf(b->shader, true, false, VARYING_SLOT_POS, 3, 1);
|
||||
|
||||
agx_index I = agx_get_cf(b->shader, true, perspective,
|
||||
sem.location + nir_src_as_uint(*offset), 0,
|
||||
sem.location + nir_src_as_uint(*offset),
|
||||
nir_intrinsic_component(instr),
|
||||
components);
|
||||
|
||||
agx_iter_to(b, dest, I, J, components, perspective);
|
||||
|
|
@ -1687,6 +1692,11 @@ agx_optimize_nir(nir_shader *nir, unsigned *preamble_size)
|
|||
NIR_PASS_V(nir, agx_nir_opt_preamble, preamble_size);
|
||||
NIR_PASS_V(nir, nir_opt_algebraic_late);
|
||||
NIR_PASS_V(nir, nir_opt_constant_folding);
|
||||
|
||||
/* Must run after uses are fixed but before a last round of copyprop + DCE */
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT)
|
||||
NIR_PASS_V(nir, agx_nir_lower_load_mask);
|
||||
|
||||
NIR_PASS_V(nir, nir_copy_prop);
|
||||
NIR_PASS_V(nir, nir_opt_dce);
|
||||
NIR_PASS_V(nir, nir_opt_cse);
|
||||
|
|
|
|||
|
|
@ -791,6 +791,7 @@ void agx_liveness_ins_update(BITSET_WORD *live, agx_instr *I);
|
|||
bool agx_lower_resinfo(nir_shader *s);
|
||||
bool agx_nir_lower_array_texture(nir_shader *s);
|
||||
bool agx_nir_opt_preamble(nir_shader *s, unsigned *preamble_size);
|
||||
bool agx_nir_lower_load_mask(nir_shader *shader);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern C */
|
||||
|
|
|
|||
83
src/asahi/compiler/agx_nir_lower_load_mask.c
Normal file
83
src/asahi/compiler/agx_nir_lower_load_mask.c
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
* Copyright 2022 Alyssa Rosenzweig
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "agx_compiler.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
|
||||
/*
|
||||
* Lower load_interpolated_input instructions with unused components of their
|
||||
* destination, duplicating the intrinsic and shrinking to avoid the holes.
|
||||
* load_interpolated_input becomes iter instructions, which lack a write mask.
|
||||
*/
|
||||
static bool
|
||||
pass(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
if (intr->intrinsic != nir_intrinsic_load_interpolated_input)
|
||||
return false;
|
||||
|
||||
unsigned mask = nir_ssa_def_components_read(&intr->dest.ssa);
|
||||
if (mask == 0 || mask == nir_component_mask(intr->num_components))
|
||||
return false;
|
||||
|
||||
b->cursor = nir_before_instr(instr);
|
||||
unsigned bit_size = nir_dest_bit_size(intr->dest);
|
||||
nir_ssa_def *comps[4] = { NULL };
|
||||
|
||||
for (unsigned c = 0; c < intr->num_components; ++c) {
|
||||
if (mask & BITFIELD_BIT(c)) {
|
||||
/* Count contiguous components to combine with */
|
||||
unsigned next_mask = mask >> c;
|
||||
unsigned next_zero = ffs(~next_mask);
|
||||
unsigned count = next_zero - 1;
|
||||
|
||||
assert(next_zero >= 2);
|
||||
assert(count >= 1);
|
||||
|
||||
nir_instr *clone = nir_instr_clone(b->shader, instr);
|
||||
nir_intrinsic_instr *clone_intr = nir_instr_as_intrinsic(clone);
|
||||
|
||||
/* Shrink the load to count contiguous components */
|
||||
nir_ssa_dest_init(clone, &clone_intr->dest, count, bit_size, NULL);
|
||||
nir_ssa_def *clone_vec = &clone_intr->dest.ssa;
|
||||
clone_intr->num_components = count;
|
||||
|
||||
/* The load starts from component c relative to the original load */
|
||||
nir_intrinsic_set_component(clone_intr,
|
||||
nir_intrinsic_component(intr) + c);
|
||||
|
||||
nir_builder_instr_insert(b, &clone_intr->instr);
|
||||
|
||||
/* The destination is a vector with `count` components, extract the
|
||||
* components so we can recombine into the final vector.
|
||||
*/
|
||||
for (unsigned d = 0; d < count; ++d)
|
||||
comps[c + d] = nir_channel(b, clone_vec, d);
|
||||
|
||||
c += (count - 1);
|
||||
} else {
|
||||
/* The value of unused components is irrelevant, but use an undef for
|
||||
* semantics. It will be eliminated by DCE after copyprop.
|
||||
*/
|
||||
comps[c] = nir_ssa_undef(b, 1, bit_size);
|
||||
}
|
||||
}
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa,
|
||||
nir_vec(b, comps, intr->num_components));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
agx_nir_lower_load_mask(nir_shader *shader)
|
||||
{
|
||||
return nir_shader_instructions_pass(shader, pass,
|
||||
nir_metadata_block_index |
|
||||
nir_metadata_dominance,
|
||||
NULL);
|
||||
}
|
||||
|
|
@ -24,6 +24,7 @@ libasahi_agx_files = files(
|
|||
'agx_dce.c',
|
||||
'agx_liveness.c',
|
||||
'agx_nir_lower_array_texture.c',
|
||||
'agx_nir_lower_load_mask.c',
|
||||
'agx_nir_opt_preamble.c',
|
||||
'agx_lower_64bit.c',
|
||||
'agx_lower_resinfo.c',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue