agx: call agx_nir_lower_sample_mask earlier

A given sample needs depth testing to happen before writing its colour, which
requires shuffling pass order.

To do so, merge agx_nir_opt_ixor_bcsel into regular late alg pass
now that we can, which is actually a small shader-db win.

Closes https://gitlab.freedesktop.org/asahi/mesa/-/issues/30

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27616>
This commit is contained in:
Alyssa Rosenzweig 2024-02-06 21:44:21 -04:00 committed by Marge Bot
parent 9cb03ba5c7
commit 27ddcea2b3
5 changed files with 6 additions and 21 deletions

View file

@ -3154,25 +3154,6 @@ agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key,
out->push_count = key->reserved_preamble;
agx_optimize_nir(nir, &out->push_count);
/* Create sample_mask instructions late, since NIR's scheduling is not aware
* of the ordering requirements between sample_mask and pixel stores.
*
* Note: when epilogs are used, special handling is required since the sample
* count is dynamic when the main fragment shader is compiled.
*/
if (nir->info.stage == MESA_SHADER_FRAGMENT && key->fs.nr_samples) {
if (agx_nir_lower_sample_mask(nir)) {
/* Clean up ixor(bcsel) patterns created from sample mask lowering.
* Also constant fold to get the benefit. We need to rescalarize after
* folding constants.
*/
NIR_PASS(_, nir, agx_nir_opt_ixor_bcsel);
NIR_PASS(_, nir, nir_opt_constant_folding);
NIR_PASS(_, nir, nir_lower_load_const_to_scalar);
NIR_PASS(_, nir, nir_opt_dce);
}
}
/* Must be last since NIR passes can remap driver_location freely */
if (nir->info.stage == MESA_SHADER_VERTEX)
agx_remap_varyings_vs(nir, &out->varyings.vs, key);

View file

@ -256,6 +256,7 @@ void agx_preprocess_nir(nir_shader *nir, const nir_shader *libagx,
struct agx_uncompiled_shader_info *out);
bool agx_nir_lower_discard_zs_emit(nir_shader *s);
bool agx_nir_lower_sample_mask(nir_shader *s);
bool agx_nir_lower_cull_distance_fs(struct nir_shader *s,
unsigned nr_distances);

View file

@ -926,7 +926,6 @@ void agx_emit_parallel_copies(agx_builder *b, struct agx_copy *copies,
void agx_compute_liveness(agx_context *ctx);
void agx_liveness_ins_update(BITSET_WORD *live, agx_instr *I);
bool agx_nir_lower_sample_mask(nir_shader *s);
bool agx_nir_lower_texture(nir_shader *s);
bool agx_nir_opt_preamble(nir_shader *s, unsigned *preamble_size);
bool agx_nir_lower_load_mask(nir_shader *shader);

View file

@ -193,7 +193,8 @@ def run():
lower_sm5_shift + lower_pack +
lower_selects).render())
print(nir_algebraic.AlgebraicPass("agx_nir_fuse_algebraic_late",
fuse_extr + fuse_ubfe + fuse_imad).render())
fuse_extr + fuse_ubfe + fuse_imad +
ixor_bcsel).render())
print(nir_algebraic.AlgebraicPass("agx_nir_opt_ixor_bcsel",
ixor_bcsel).render())

View file

@ -4,6 +4,7 @@
* SPDX-License-Identifier: MIT
*/
#include "agx_compile.h"
#include "agx_tilebuffer.h"
#include "nir.h"
#include "nir_builder.h"
@ -187,6 +188,8 @@ agx_nir_lower_monolithic_msaa(nir_shader *shader, struct agx_msaa_state *state)
nir_metadata_block_index | nir_metadata_dominance,
&state->nr_samples);
agx_nir_lower_sample_mask(shader);
/* In single sampled programs, interpolateAtSample needs to return the
* center pixel. TODO: Generalize for dynamic sample count.
*/