agx: Fix discards

Switch our frontends from generating sample_mask_agx to discard_agx, and
switching from legalizing sample_mask_agx to lowering discard_agx to
sample_mask_agx. This is a much easier problem and is done here in a way that is
simple (and inefficient) but obviously correct.

This should fix corruption in Darwinia.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23832>
This commit is contained in:
Alyssa Rosenzweig 2023-06-14 12:34:34 -04:00 committed by Marge Bot
parent baf67144bd
commit b5fccfa197
5 changed files with 63 additions and 91 deletions

View file

@ -6,6 +6,7 @@
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h"
#include "agx_compiler.h"
#include "nir_builder_opcodes.h"
#define ALL_SAMPLES 0xFF
#define BASE_Z 1
@ -86,15 +87,13 @@ lower_discard(nir_builder *b, nir_instr *instr, UNUSED void *data)
nir_ssa_def *all_samples = nir_imm_intN_t(b, ALL_SAMPLES, 16);
nir_ssa_def *no_samples = nir_imm_intN_t(b, 0, 16);
nir_ssa_def *killed_samples = all_samples;
if (intr->intrinsic == nir_intrinsic_discard_if)
no_samples = nir_bcsel(b, intr->src[0].ssa, no_samples, all_samples);
/* This will get lowered later to zs_emit if needed */
nir_sample_mask_agx(b, all_samples, no_samples);
b->shader->info.fs.uses_discard = false;
b->shader->info.outputs_written |= BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
killed_samples = nir_bcsel(b, intr->src[0].ssa, all_samples, no_samples);
/* This will get lowered later as needed */
nir_discard_agx(b, killed_samples);
nir_instr_remove(instr);
return true;
}

View file

@ -5,6 +5,7 @@
#include "compiler/nir/nir_builder.h"
#include "agx_compiler.h"
#include "nir_intrinsics.h"
/*
* sample_mask takes two bitmasks as arguments, TARGET and LIVE. Each bit refers
@ -51,7 +52,9 @@
* 4. If zs_emit is used anywhere in the shader, sample_mask must not be used.
* Instead, zs_emit with depth = NaN can be emitted.
*
* This pass legalizes some sample_mask instructions to satisfy these rules.
* This pass lowers discard_agx to sample_mask instructions satisfying these
* rules. Other passes should not generate sample_mask instructions, as there
* are too many footguns.
*/
#define ALL_SAMPLES (0xFF)
@ -91,15 +94,11 @@ lower_sample_mask_to_zs(nir_builder *b, nir_instr *instr, UNUSED void *data)
return true;
}
if (intr->intrinsic != nir_intrinsic_sample_mask_agx)
if (intr->intrinsic != nir_intrinsic_discard_agx)
return false;
nir_ssa_def *target = intr->src[0].ssa;
nir_ssa_def *live = intr->src[1].ssa;
nir_ssa_def *discard = nir_iand(b, target, nir_inot(b, live));
/* Write a NaN depth value for discarded samples */
nir_store_zs_agx(b, discard, nir_imm_float(b, NAN),
nir_store_zs_agx(b, intr->src[0].ssa, nir_imm_float(b, NAN),
stencil_written ? nir_imm_intN_t(b, 0, 16)
: nir_ssa_undef(b, 1, 16) /* stencil */,
.base = BASE_Z | (stencil_written ? BASE_S : 0));
@ -108,11 +107,27 @@ lower_sample_mask_to_zs(nir_builder *b, nir_instr *instr, UNUSED void *data)
return true;
}
static bool
lower_discard_to_sample_mask_0(nir_builder *b, nir_instr *instr,
UNUSED void *data)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic != nir_intrinsic_discard_agx)
return false;
b->cursor = nir_before_instr(instr);
nir_sample_mask_agx(b, intr->src[0].ssa, nir_imm_intN_t(b, 0, 16));
nir_instr_remove(instr);
return true;
}
bool
agx_nir_lower_sample_mask(nir_shader *shader, unsigned nr_samples)
{
if (!(shader->info.outputs_written &
(BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK))))
if (!shader->info.fs.uses_discard)
return false;
/* sample_mask can't be used with zs_emit, so lower sample_mask to zs_emit */
@ -131,40 +146,19 @@ agx_nir_lower_sample_mask(nir_shader *shader, unsigned nr_samples)
return true;
}
/* nir_lower_io_to_temporaries ensures that stores are in the last block */
/* Pessimistic lowering: force late depth/stencil test. TODO: Optimize. */
nir_shader_instructions_pass(
shader, lower_discard_to_sample_mask_0,
nir_metadata_block_index | nir_metadata_dominance, NULL);
/* nir_lower_io_to_temporaries ensures that stores are in the last block. */
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
nir_block *block = nir_impl_last_block(impl);
nir_builder b;
nir_builder_init(&b, impl);
/* Check which samples get a value written in the last block */
uint8_t samples_set = 0;
nir_foreach_instr(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic != nir_intrinsic_sample_mask_agx)
continue;
if (!nir_src_is_const(intr->src[0]))
continue;
samples_set |= nir_src_as_uint(intr->src[0]);
}
/* If all samples are set, we're good to go */
if ((samples_set & BITFIELD_MASK(nr_samples)) == BITFIELD_MASK(nr_samples))
return false;
/* Otherwise, at least one sample is not set in the last block and hence may
* not be set at all. Insert an instruction in the last block to ensure it
* will be live.
*/
b.cursor = nir_after_block(block);
nir_foreach_instr(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
@ -177,6 +171,7 @@ agx_nir_lower_sample_mask(nir_shader *shader, unsigned nr_samples)
break;
}
/* Run depth/stencil tests for all remaining samples */
nir_sample_mask_agx(&b, nir_imm_intN_t(&b, ALL_SAMPLES, 16),
nir_imm_intN_t(&b, ALL_SAMPLES, 16));
return true;

View file

@ -70,8 +70,8 @@ agx_nir_lower_alpha_to_coverage(nir_shader *shader, uint8_t nr_samples)
nir_iadd_imm(b, nir_ishl(b, nir_imm_intN_t(b, 1, 16), bits), -1);
/* Discard samples that aren't covered */
nir_sample_mask_agx(b, nir_imm_intN_t(b, ALL_SAMPLES, 16), mask);
shader->info.outputs_written |= BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
nir_discard_agx(b, nir_inot(b, mask));
shader->info.fs.uses_discard = true;
}
/*

View file

@ -31,7 +31,7 @@ lower_wrapped(nir_builder *b, nir_instr *instr, void *data)
case nir_intrinsic_load_local_pixel_agx:
case nir_intrinsic_store_local_pixel_agx:
case nir_intrinsic_store_zs_agx:
case nir_intrinsic_sample_mask_agx: {
case nir_intrinsic_discard_agx: {
/* Fragment I/O inside the loop should only affect one sample. */
unsigned mask_index =
(intr->intrinsic == nir_intrinsic_store_local_pixel_agx) ? 1 : 0;
@ -110,26 +110,17 @@ lower_sample_mask_write(nir_builder *b, nir_instr *instr, void *data)
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
b->cursor = nir_before_instr(instr);
nir_ssa_def *mask;
if (intr->intrinsic == nir_intrinsic_sample_mask_agx) {
/* For alpha-to-coverage */
assert(nir_src_as_uint(intr->src[0]) == ALL_SAMPLES && "not wrapped");
mask = intr->src[1].ssa;
} else if (intr->intrinsic == nir_intrinsic_store_output) {
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
if (sem.location != FRAG_RESULT_SAMPLE_MASK)
return false;
/* Sample mask writes are ignored unless multisampling is used. */
if (state->nr_samples == 1) {
nir_instr_remove(instr);
return true;
}
mask = nir_u2u16(b, intr->src[0].ssa);
} else {
if (intr->intrinsic != nir_intrinsic_store_output)
return false;
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
if (sem.location != FRAG_RESULT_SAMPLE_MASK)
return false;
/* Sample mask writes are ignored unless multisampling is used. */
if (state->nr_samples == 1) {
nir_instr_remove(instr);
return true;
}
/* The Vulkan spec says:
@ -139,13 +130,9 @@ lower_sample_mask_write(nir_builder *b, nir_instr *instr, void *data)
* shader invocation are ignored.
*
* That will be satisfied by outputting gl_SampleMask for the whole pixel
* and then lowering sample shading after (splitting up sample_mask
* targets).
* and then lowering sample shading after (splitting up discard targets).
*/
if (state->api_sample_mask)
mask = nir_iand(b, mask, nir_load_api_sample_mask_agx(b));
nir_sample_mask_agx(b, nir_imm_intN_t(b, ALL_SAMPLES, 16), mask);
nir_discard_agx(b, nir_inot(b, nir_u2u16(b, intr->src[0].ssa)));
nir_instr_remove(instr);
return true;
}
@ -179,20 +166,14 @@ lower_sample_mask_read(nir_builder *b, nir_instr *instr, UNUSED void *_)
static void
insert_sample_mask_write(nir_shader *s)
{
/* nir_lower_io_to_temporaries ensures that stores are in the last block */
nir_function_impl *impl = nir_shader_get_entrypoint(s);
nir_builder b;
nir_function_impl *impl = nir_shader_get_entrypoint(s);
nir_builder_init(&b, impl);
b.cursor = nir_before_block(nir_start_block(impl));
/* Load the desired API sample mask */
nir_ssa_def *api_sample_mask = nir_load_api_sample_mask_agx(&b);
/* Kill samples that are not covered by the mask using the AGX instruction */
nir_ssa_def *all_samples = nir_imm_intN_t(&b, ALL_SAMPLES, 16);
nir_sample_mask_agx(&b, all_samples, api_sample_mask);
s->info.outputs_written |= BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
/* Kill samples that are NOT covered by the mask */
nir_discard_agx(&b, nir_inot(&b, nir_load_api_sample_mask_agx(&b)));
s->info.fs.uses_discard = true;
}
/*
@ -207,20 +188,17 @@ agx_nir_lower_monolithic_msaa(nir_shader *shader, struct agx_msaa_state *state)
assert(state->nr_samples == 1 || state->nr_samples == 2 ||
state->nr_samples == 4);
/* Lower gl_SampleMask writes */
if (shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
/* Sample mask writes need to be lowered. This includes an API sample mask
* lowering.
*/
nir_shader_instructions_pass(
shader, lower_sample_mask_write,
nir_metadata_block_index | nir_metadata_dominance, state);
} else if ((state->nr_samples > 1) && state->api_sample_mask) {
/* If there's no sample mask write, we need to add one of our own for the
* API-level sample masking to work.
*/
insert_sample_mask_write(shader);
}
/* Lower API sample masks */
if ((state->nr_samples > 1) && state->api_sample_mask)
insert_sample_mask_write(shader);
/* Additional, sample_mask_in needs to account for the API-level mask */
nir_shader_instructions_pass(
shader, lower_sample_mask_read,

View file

@ -102,9 +102,9 @@ lower_to_sample(nir_builder *b, nir_instr *instr, void *_)
* The load_sample_id intrinsics themselves are lowered later, with different
* lowerings for monolithic vs epilogs.
*
* Note that fragment I/O (like store_local_pixel_agx and sample_mask_agx) does
* not get lowered here, because that lowering is different for monolithic vs
* FS epilogs even though there's no dependency on sample count.
* Note that fragment I/O (like store_local_pixel_agx and discard_agx) does not
* get lowered here, because that lowering is different for monolithic vs FS
* epilogs even though there's no dependency on sample count.
*/
bool
agx_nir_lower_sample_intrinsics(nir_shader *shader)