mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-26 09:00:37 +02:00
agx: handle discard with force early tests
we need to predicate the store, since we can't do a hardware demote after running tests. this is similar to what the blob does. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29607>
This commit is contained in:
parent
1dfb461552
commit
65e64b6e2d
6 changed files with 91 additions and 16 deletions
|
|
@ -71,6 +71,8 @@ the fragment epilog):
|
|||
depth and/or stencil are written by the fragment shader. Depth/stencil writes
|
||||
must be deferred to the epilog for correctness when the epilog can discard
|
||||
(i.e. when alpha-to-coverage is enabled).
|
||||
* `r3h` contains the logically emitted sample mask, if the fragment shader uses
|
||||
forced early tests. This predicates the epilog's stores.
|
||||
* The vec4 of 32-bit registers beginning at `r(4 * (i + 1))` contains the colour
|
||||
output for render target `i`. When dual source blending is enabled, there is
|
||||
only a single render target and the dual source colour is treated as the
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ agx_compile_bg_eot_shader(struct agx_bg_eot_cache *cache, nir_shader *shader,
|
|||
agx_preprocess_nir(shader, cache->dev->libagx);
|
||||
if (tib) {
|
||||
unsigned bindless_base = 0;
|
||||
agx_nir_lower_tilebuffer(shader, tib, NULL, &bindless_base, NULL);
|
||||
agx_nir_lower_tilebuffer(shader, tib, NULL, &bindless_base, NULL, NULL);
|
||||
agx_nir_lower_monolithic_msaa(shader, tib->nr_samples);
|
||||
agx_nir_lower_multisampled_image_store(shader);
|
||||
agx_nir_lower_texture(shader);
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ struct ctx {
|
|||
unsigned bindless_base;
|
||||
bool any_memory_stores;
|
||||
uint8_t outputs_written;
|
||||
nir_def *write_samples;
|
||||
};
|
||||
|
||||
static bool
|
||||
|
|
@ -46,7 +47,8 @@ tib_filter(const nir_instr *instr, UNUSED const void *_)
|
|||
static void
|
||||
store_tilebuffer(nir_builder *b, struct agx_tilebuffer_layout *tib,
|
||||
enum pipe_format format, enum pipe_format logical_format,
|
||||
unsigned rt, nir_def *value, unsigned write_mask)
|
||||
unsigned rt, nir_def *value, nir_def *samples,
|
||||
unsigned write_mask)
|
||||
{
|
||||
/* The hardware cannot extend for a 32-bit format. Extend ourselves. */
|
||||
if (format == PIPE_FORMAT_R32_UINT && value->bit_size == 16) {
|
||||
|
|
@ -84,10 +86,12 @@ store_tilebuffer(nir_builder *b, struct agx_tilebuffer_layout *tib,
|
|||
value = nir_u2u16(b, value);
|
||||
}
|
||||
|
||||
if (!samples)
|
||||
samples = nir_imm_intN_t(b, ALL_SAMPLES, 16);
|
||||
|
||||
uint8_t offset_B = agx_tilebuffer_offset_B(tib, rt);
|
||||
nir_store_local_pixel_agx(b, value, nir_imm_intN_t(b, ALL_SAMPLES, 16),
|
||||
.base = offset_B, .write_mask = write_mask,
|
||||
.format = format);
|
||||
nir_store_local_pixel_agx(b, value, samples, .base = offset_B,
|
||||
.write_mask = write_mask, .format = format);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
|
|
@ -181,7 +185,8 @@ image_coords(nir_builder *b)
|
|||
|
||||
static void
|
||||
store_memory(nir_builder *b, unsigned bindless_base, unsigned nr_samples,
|
||||
enum pipe_format format, unsigned rt, nir_def *value)
|
||||
enum pipe_format format, unsigned rt, nir_def *value,
|
||||
nir_def *samples)
|
||||
{
|
||||
nir_def *image = handle_for_rt(b, bindless_base, rt, true);
|
||||
nir_def *tex_image = handle_for_rt(b, bindless_base, rt, false);
|
||||
|
|
@ -216,10 +221,16 @@ store_memory(nir_builder *b, unsigned bindless_base, unsigned nr_samples,
|
|||
|
||||
if (nr_samples > 1) {
|
||||
nir_def *coverage = nir_load_sample_mask(b);
|
||||
|
||||
if (samples != NULL)
|
||||
coverage = nir_iand(b, coverage, samples);
|
||||
|
||||
nir_def *covered = nir_ubitfield_extract(
|
||||
b, coverage, nir_u2u32(b, sample), nir_imm_int(b, 1));
|
||||
|
||||
cond = nir_iand(b, cond, nir_ine_imm(b, covered, 0));
|
||||
} else if (samples != NULL) {
|
||||
cond = nir_iand(b, cond, nir_ine_imm(b, samples, 0));
|
||||
}
|
||||
|
||||
nir_push_if(b, cond);
|
||||
|
|
@ -290,6 +301,13 @@ tib_impl(nir_builder *b, nir_instr *instr, void *data)
|
|||
*(ctx->translucent) = true;
|
||||
}
|
||||
|
||||
if (ctx->write_samples) {
|
||||
assert(ctx->translucent != NULL &&
|
||||
"sample masking requires translucency");
|
||||
|
||||
*(ctx->translucent) = true;
|
||||
}
|
||||
|
||||
/* But we ignore the NIR write mask for that, since it's basically an
|
||||
* optimization hint.
|
||||
*/
|
||||
|
|
@ -307,11 +325,11 @@ tib_impl(nir_builder *b, nir_instr *instr, void *data)
|
|||
|
||||
if (tib->spilled[rt]) {
|
||||
store_memory(b, ctx->bindless_base, tib->nr_samples, logical_format,
|
||||
rt, value);
|
||||
rt, value, ctx->write_samples);
|
||||
ctx->any_memory_stores = true;
|
||||
} else {
|
||||
store_tilebuffer(b, tib, format, logical_format, rt, value,
|
||||
write_mask);
|
||||
ctx->write_samples, write_mask);
|
||||
}
|
||||
|
||||
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
|
||||
|
|
@ -338,7 +356,7 @@ tib_impl(nir_builder *b, nir_instr *instr, void *data)
|
|||
bool
|
||||
agx_nir_lower_tilebuffer(nir_shader *shader, struct agx_tilebuffer_layout *tib,
|
||||
uint8_t *colormasks, unsigned *bindless_base,
|
||||
bool *translucent)
|
||||
nir_def *write_samples, bool *translucent)
|
||||
{
|
||||
assert(shader->info.stage == MESA_SHADER_FRAGMENT);
|
||||
|
||||
|
|
@ -346,6 +364,7 @@ agx_nir_lower_tilebuffer(nir_shader *shader, struct agx_tilebuffer_layout *tib,
|
|||
.tib = tib,
|
||||
.colormasks = colormasks,
|
||||
.translucent = translucent,
|
||||
.write_samples = write_samples,
|
||||
};
|
||||
|
||||
/* Allocate 1 texture + 1 PBE descriptor for each spilled descriptor */
|
||||
|
|
|
|||
|
|
@ -312,12 +312,16 @@ agx_nir_fs_epilog(nir_builder *b, const void *key_)
|
|||
.src_type = nir_type_float | size);
|
||||
}
|
||||
|
||||
/* Grab the sample ID early, this has to happen in the first block. */
|
||||
nir_def *sample_id = NULL;
|
||||
/* Grab registers early, this has to happen in the first block. */
|
||||
nir_def *sample_id = NULL, *write_samples = NULL;
|
||||
if (key->link.sample_shading) {
|
||||
sample_id = nir_load_exported_agx(b, 1, 16, .base = 1);
|
||||
}
|
||||
|
||||
if (key->link.sample_mask_after_force_early) {
|
||||
write_samples = nir_load_exported_agx(b, 1, 16, .base = 7);
|
||||
}
|
||||
|
||||
/* Now lower the resulting program using the key */
|
||||
struct agx_tilebuffer_layout tib = agx_build_tilebuffer_layout(
|
||||
key->rt_formats, ARRAY_SIZE(key->rt_formats), key->nr_samples, true);
|
||||
|
|
@ -400,7 +404,7 @@ agx_nir_fs_epilog(nir_builder *b, const void *key_)
|
|||
|
||||
unsigned rt_spill = key->link.rt_spill_base;
|
||||
NIR_PASS(_, b->shader, agx_nir_lower_tilebuffer, &tib, colormasks, &rt_spill,
|
||||
&force_translucent);
|
||||
write_samples, &force_translucent);
|
||||
NIR_PASS(_, b->shader, agx_nir_lower_texture);
|
||||
NIR_PASS(_, b->shader, agx_nir_lower_multisampled_image_store);
|
||||
|
||||
|
|
@ -441,10 +445,17 @@ agx_nir_fs_epilog(nir_builder *b, const void *key_)
|
|||
b->shader->info.fs.uses_sample_shading = key->link.sample_shading;
|
||||
}
|
||||
|
||||
struct lower_epilog_ctx {
|
||||
struct agx_fs_epilog_link_info *info;
|
||||
nir_variable *masked_samples;
|
||||
};
|
||||
|
||||
static bool
|
||||
lower_output_to_epilog(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||
{
|
||||
struct agx_fs_epilog_link_info *info = data;
|
||||
struct lower_epilog_ctx *ctx = data;
|
||||
struct agx_fs_epilog_link_info *info = ctx->info;
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_store_zs_agx) {
|
||||
assert(nir_src_as_uint(intr->src[0]) == 0xff && "msaa not yet lowered");
|
||||
b->cursor = nir_instr_remove(&intr->instr);
|
||||
|
|
@ -464,6 +475,32 @@ lower_output_to_epilog(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||
return true;
|
||||
}
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_discard_agx &&
|
||||
b->shader->info.fs.early_fragment_tests) {
|
||||
|
||||
if (!ctx->masked_samples) {
|
||||
b->cursor = nir_before_impl(nir_shader_get_entrypoint(b->shader));
|
||||
|
||||
ctx->masked_samples =
|
||||
nir_local_variable_create(b->impl, glsl_uint16_t_type(), NULL);
|
||||
|
||||
nir_store_var(b, ctx->masked_samples, nir_imm_intN_t(b, 0xFF, 16),
|
||||
nir_component_mask(1));
|
||||
}
|
||||
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
nir_def *mask = nir_load_var(b, ctx->masked_samples);
|
||||
nir_def *mask_2 =
|
||||
nir_ixor(b, intr->src[0].ssa, nir_imm_intN_t(b, 0xff, 16));
|
||||
|
||||
mask = nir_iand(b, mask, mask_2);
|
||||
nir_store_var(b, ctx->masked_samples, mask, nir_component_mask(1));
|
||||
|
||||
nir_instr_remove(&intr->instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (intr->intrinsic != nir_intrinsic_store_output)
|
||||
return false;
|
||||
|
||||
|
|
@ -525,9 +562,26 @@ bool
|
|||
agx_nir_lower_fs_output_to_epilog(nir_shader *s,
|
||||
struct agx_fs_epilog_link_info *out)
|
||||
{
|
||||
struct lower_epilog_ctx ctx = {.info = out};
|
||||
|
||||
nir_shader_intrinsics_pass(s, lower_output_to_epilog,
|
||||
nir_metadata_dominance | nir_metadata_block_index,
|
||||
out);
|
||||
&ctx);
|
||||
|
||||
if (ctx.masked_samples) {
|
||||
nir_builder b =
|
||||
nir_builder_at(nir_after_impl(nir_shader_get_entrypoint(s)));
|
||||
|
||||
nir_export_agx(&b, nir_load_var(&b, ctx.masked_samples), .base = 7);
|
||||
out->sample_mask_after_force_early = true;
|
||||
|
||||
bool progress;
|
||||
do {
|
||||
progress = false;
|
||||
NIR_PASS(progress, s, nir_lower_vars_to_ssa);
|
||||
NIR_PASS(progress, s, nir_opt_dce);
|
||||
} while (progress);
|
||||
}
|
||||
|
||||
out->sample_shading = s->info.fs.uses_sample_shading;
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ agx_build_tilebuffer_layout(const enum pipe_format *formats, uint8_t nr_cbufs,
|
|||
bool agx_nir_lower_tilebuffer(struct nir_shader *shader,
|
||||
struct agx_tilebuffer_layout *tib,
|
||||
uint8_t *colormasks, unsigned *bindless_base,
|
||||
bool *translucent);
|
||||
struct nir_def *write_samples, bool *translucent);
|
||||
|
||||
bool agx_nir_lower_to_per_sample(struct nir_shader *shader);
|
||||
|
||||
|
|
|
|||
|
|
@ -1674,7 +1674,7 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
|
|||
unsigned rt_spill_base = BITSET_LAST_BIT(nir->info.textures_used) +
|
||||
(2 * BITSET_LAST_BIT(nir->info.images_used));
|
||||
unsigned rt_spill = rt_spill_base;
|
||||
NIR_PASS(_, nir, agx_nir_lower_tilebuffer, &tib, NULL, &rt_spill,
|
||||
NIR_PASS(_, nir, agx_nir_lower_tilebuffer, &tib, NULL, &rt_spill, NULL,
|
||||
NULL);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue