agx: handle discard with force early tests

we need to predicate the store, since we can't do a hardware demote after
running tests. this is similar to what the blob does.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29607>
This commit is contained in:
Alyssa Rosenzweig 2024-04-27 11:30:15 -04:00 committed by Marge Bot
parent 1dfb461552
commit 65e64b6e2d
6 changed files with 91 additions and 16 deletions

View file

@ -71,6 +71,8 @@ the fragment epilog):
depth and/or stencil are written by the fragment shader. Depth/stencil writes
must be deferred to the epilog for correctness when the epilog can discard
(i.e. when alpha-to-coverage is enabled).
* `r3h` contains the logically emitted sample mask, if the fragment shader uses
forced early tests. This predicates the epilog's stores.
* The vec4 of 32-bit registers beginning at `r(4 * (i + 1))` contains the colour
output for render target `i`. When dual source blending is enabled, there is
only a single render target and the dual source colour is treated as the

View file

@ -35,7 +35,7 @@ agx_compile_bg_eot_shader(struct agx_bg_eot_cache *cache, nir_shader *shader,
agx_preprocess_nir(shader, cache->dev->libagx);
if (tib) {
unsigned bindless_base = 0;
agx_nir_lower_tilebuffer(shader, tib, NULL, &bindless_base, NULL);
agx_nir_lower_tilebuffer(shader, tib, NULL, &bindless_base, NULL, NULL);
agx_nir_lower_monolithic_msaa(shader, tib->nr_samples);
agx_nir_lower_multisampled_image_store(shader);
agx_nir_lower_texture(shader);

View file

@ -25,6 +25,7 @@ struct ctx {
unsigned bindless_base;
bool any_memory_stores;
uint8_t outputs_written;
nir_def *write_samples;
};
static bool
@ -46,7 +47,8 @@ tib_filter(const nir_instr *instr, UNUSED const void *_)
static void
store_tilebuffer(nir_builder *b, struct agx_tilebuffer_layout *tib,
enum pipe_format format, enum pipe_format logical_format,
unsigned rt, nir_def *value, unsigned write_mask)
unsigned rt, nir_def *value, nir_def *samples,
unsigned write_mask)
{
/* The hardware cannot extend for a 32-bit format. Extend ourselves. */
if (format == PIPE_FORMAT_R32_UINT && value->bit_size == 16) {
@ -84,10 +86,12 @@ store_tilebuffer(nir_builder *b, struct agx_tilebuffer_layout *tib,
value = nir_u2u16(b, value);
}
if (!samples)
samples = nir_imm_intN_t(b, ALL_SAMPLES, 16);
uint8_t offset_B = agx_tilebuffer_offset_B(tib, rt);
nir_store_local_pixel_agx(b, value, nir_imm_intN_t(b, ALL_SAMPLES, 16),
.base = offset_B, .write_mask = write_mask,
.format = format);
nir_store_local_pixel_agx(b, value, samples, .base = offset_B,
.write_mask = write_mask, .format = format);
}
static nir_def *
@ -181,7 +185,8 @@ image_coords(nir_builder *b)
static void
store_memory(nir_builder *b, unsigned bindless_base, unsigned nr_samples,
enum pipe_format format, unsigned rt, nir_def *value)
enum pipe_format format, unsigned rt, nir_def *value,
nir_def *samples)
{
nir_def *image = handle_for_rt(b, bindless_base, rt, true);
nir_def *tex_image = handle_for_rt(b, bindless_base, rt, false);
@ -216,10 +221,16 @@ store_memory(nir_builder *b, unsigned bindless_base, unsigned nr_samples,
if (nr_samples > 1) {
nir_def *coverage = nir_load_sample_mask(b);
if (samples != NULL)
coverage = nir_iand(b, coverage, samples);
nir_def *covered = nir_ubitfield_extract(
b, coverage, nir_u2u32(b, sample), nir_imm_int(b, 1));
cond = nir_iand(b, cond, nir_ine_imm(b, covered, 0));
} else if (samples != NULL) {
cond = nir_iand(b, cond, nir_ine_imm(b, samples, 0));
}
nir_push_if(b, cond);
@ -290,6 +301,13 @@ tib_impl(nir_builder *b, nir_instr *instr, void *data)
*(ctx->translucent) = true;
}
if (ctx->write_samples) {
assert(ctx->translucent != NULL &&
"sample masking requires translucency");
*(ctx->translucent) = true;
}
/* But we ignore the NIR write mask for that, since it's basically an
* optimization hint.
*/
@ -307,11 +325,11 @@ tib_impl(nir_builder *b, nir_instr *instr, void *data)
if (tib->spilled[rt]) {
store_memory(b, ctx->bindless_base, tib->nr_samples, logical_format,
rt, value);
rt, value, ctx->write_samples);
ctx->any_memory_stores = true;
} else {
store_tilebuffer(b, tib, format, logical_format, rt, value,
write_mask);
ctx->write_samples, write_mask);
}
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
@ -338,7 +356,7 @@ tib_impl(nir_builder *b, nir_instr *instr, void *data)
bool
agx_nir_lower_tilebuffer(nir_shader *shader, struct agx_tilebuffer_layout *tib,
uint8_t *colormasks, unsigned *bindless_base,
bool *translucent)
nir_def *write_samples, bool *translucent)
{
assert(shader->info.stage == MESA_SHADER_FRAGMENT);
@ -346,6 +364,7 @@ agx_nir_lower_tilebuffer(nir_shader *shader, struct agx_tilebuffer_layout *tib,
.tib = tib,
.colormasks = colormasks,
.translucent = translucent,
.write_samples = write_samples,
};
/* Allocate 1 texture + 1 PBE descriptor for each spilled descriptor */

View file

@ -312,12 +312,16 @@ agx_nir_fs_epilog(nir_builder *b, const void *key_)
.src_type = nir_type_float | size);
}
/* Grab the sample ID early, this has to happen in the first block. */
nir_def *sample_id = NULL;
/* Grab registers early, this has to happen in the first block. */
nir_def *sample_id = NULL, *write_samples = NULL;
if (key->link.sample_shading) {
sample_id = nir_load_exported_agx(b, 1, 16, .base = 1);
}
if (key->link.sample_mask_after_force_early) {
write_samples = nir_load_exported_agx(b, 1, 16, .base = 7);
}
/* Now lower the resulting program using the key */
struct agx_tilebuffer_layout tib = agx_build_tilebuffer_layout(
key->rt_formats, ARRAY_SIZE(key->rt_formats), key->nr_samples, true);
@ -400,7 +404,7 @@ agx_nir_fs_epilog(nir_builder *b, const void *key_)
unsigned rt_spill = key->link.rt_spill_base;
NIR_PASS(_, b->shader, agx_nir_lower_tilebuffer, &tib, colormasks, &rt_spill,
&force_translucent);
write_samples, &force_translucent);
NIR_PASS(_, b->shader, agx_nir_lower_texture);
NIR_PASS(_, b->shader, agx_nir_lower_multisampled_image_store);
@ -441,10 +445,17 @@ agx_nir_fs_epilog(nir_builder *b, const void *key_)
b->shader->info.fs.uses_sample_shading = key->link.sample_shading;
}
struct lower_epilog_ctx {
struct agx_fs_epilog_link_info *info;
nir_variable *masked_samples;
};
static bool
lower_output_to_epilog(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
struct agx_fs_epilog_link_info *info = data;
struct lower_epilog_ctx *ctx = data;
struct agx_fs_epilog_link_info *info = ctx->info;
if (intr->intrinsic == nir_intrinsic_store_zs_agx) {
assert(nir_src_as_uint(intr->src[0]) == 0xff && "msaa not yet lowered");
b->cursor = nir_instr_remove(&intr->instr);
@ -464,6 +475,32 @@ lower_output_to_epilog(nir_builder *b, nir_intrinsic_instr *intr, void *data)
return true;
}
if (intr->intrinsic == nir_intrinsic_discard_agx &&
b->shader->info.fs.early_fragment_tests) {
if (!ctx->masked_samples) {
b->cursor = nir_before_impl(nir_shader_get_entrypoint(b->shader));
ctx->masked_samples =
nir_local_variable_create(b->impl, glsl_uint16_t_type(), NULL);
nir_store_var(b, ctx->masked_samples, nir_imm_intN_t(b, 0xFF, 16),
nir_component_mask(1));
}
b->cursor = nir_before_instr(&intr->instr);
nir_def *mask = nir_load_var(b, ctx->masked_samples);
nir_def *mask_2 =
nir_ixor(b, intr->src[0].ssa, nir_imm_intN_t(b, 0xff, 16));
mask = nir_iand(b, mask, mask_2);
nir_store_var(b, ctx->masked_samples, mask, nir_component_mask(1));
nir_instr_remove(&intr->instr);
return true;
}
if (intr->intrinsic != nir_intrinsic_store_output)
return false;
@ -525,9 +562,26 @@ bool
agx_nir_lower_fs_output_to_epilog(nir_shader *s,
struct agx_fs_epilog_link_info *out)
{
struct lower_epilog_ctx ctx = {.info = out};
nir_shader_intrinsics_pass(s, lower_output_to_epilog,
nir_metadata_dominance | nir_metadata_block_index,
out);
&ctx);
if (ctx.masked_samples) {
nir_builder b =
nir_builder_at(nir_after_impl(nir_shader_get_entrypoint(s)));
nir_export_agx(&b, nir_load_var(&b, ctx.masked_samples), .base = 7);
out->sample_mask_after_force_early = true;
bool progress;
do {
progress = false;
NIR_PASS(progress, s, nir_lower_vars_to_ssa);
NIR_PASS(progress, s, nir_opt_dce);
} while (progress);
}
out->sample_shading = s->info.fs.uses_sample_shading;
return true;

View file

@ -93,7 +93,7 @@ agx_build_tilebuffer_layout(const enum pipe_format *formats, uint8_t nr_cbufs,
bool agx_nir_lower_tilebuffer(struct nir_shader *shader,
struct agx_tilebuffer_layout *tib,
uint8_t *colormasks, unsigned *bindless_base,
bool *translucent);
struct nir_def *write_samples, bool *translucent);
bool agx_nir_lower_to_per_sample(struct nir_shader *shader);

View file

@ -1674,7 +1674,7 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
unsigned rt_spill_base = BITSET_LAST_BIT(nir->info.textures_used) +
(2 * BITSET_LAST_BIT(nir->info.images_used));
unsigned rt_spill = rt_spill_base;
NIR_PASS(_, nir, agx_nir_lower_tilebuffer, &tib, NULL, &rt_spill,
NIR_PASS(_, nir, agx_nir_lower_tilebuffer, &tib, NULL, &rt_spill, NULL,
NULL);
}