mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 09:38:07 +02:00
agx: Fix discards
Switch our frontends from generating sample_mask_agx to discard_agx, and switching from legalizing sample_mask_agx to lowering discard_agx to sample_mask_agx. This is a much easier problem and is done here in a way that is simple (and inefficient) but obviously correct. This should fix corruption in Darwinia. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23832>
This commit is contained in:
parent
baf67144bd
commit
b5fccfa197
5 changed files with 63 additions and 91 deletions
|
|
@ -6,6 +6,7 @@
|
|||
#include "compiler/nir/nir.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "agx_compiler.h"
|
||||
#include "nir_builder_opcodes.h"
|
||||
|
||||
#define ALL_SAMPLES 0xFF
|
||||
#define BASE_Z 1
|
||||
|
|
@ -86,15 +87,13 @@ lower_discard(nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
|
||||
nir_ssa_def *all_samples = nir_imm_intN_t(b, ALL_SAMPLES, 16);
|
||||
nir_ssa_def *no_samples = nir_imm_intN_t(b, 0, 16);
|
||||
nir_ssa_def *killed_samples = all_samples;
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_discard_if)
|
||||
no_samples = nir_bcsel(b, intr->src[0].ssa, no_samples, all_samples);
|
||||
|
||||
/* This will get lowered later to zs_emit if needed */
|
||||
nir_sample_mask_agx(b, all_samples, no_samples);
|
||||
b->shader->info.fs.uses_discard = false;
|
||||
b->shader->info.outputs_written |= BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
|
||||
killed_samples = nir_bcsel(b, intr->src[0].ssa, all_samples, no_samples);
|
||||
|
||||
/* This will get lowered later as needed */
|
||||
nir_discard_agx(b, killed_samples);
|
||||
nir_instr_remove(instr);
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "agx_compiler.h"
|
||||
#include "nir_intrinsics.h"
|
||||
|
||||
/*
|
||||
* sample_mask takes two bitmasks as arguments, TARGET and LIVE. Each bit refers
|
||||
|
|
@ -51,7 +52,9 @@
|
|||
* 4. If zs_emit is used anywhere in the shader, sample_mask must not be used.
|
||||
* Instead, zs_emit with depth = NaN can be emitted.
|
||||
*
|
||||
* This pass legalizes some sample_mask instructions to satisfy these rules.
|
||||
* This pass lowers discard_agx to sample_mask instructions satisfying these
|
||||
* rules. Other passes should not generate sample_mask instructions, as there
|
||||
* are too many footguns.
|
||||
*/
|
||||
|
||||
#define ALL_SAMPLES (0xFF)
|
||||
|
|
@ -91,15 +94,11 @@ lower_sample_mask_to_zs(nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
return true;
|
||||
}
|
||||
|
||||
if (intr->intrinsic != nir_intrinsic_sample_mask_agx)
|
||||
if (intr->intrinsic != nir_intrinsic_discard_agx)
|
||||
return false;
|
||||
|
||||
nir_ssa_def *target = intr->src[0].ssa;
|
||||
nir_ssa_def *live = intr->src[1].ssa;
|
||||
nir_ssa_def *discard = nir_iand(b, target, nir_inot(b, live));
|
||||
|
||||
/* Write a NaN depth value for discarded samples */
|
||||
nir_store_zs_agx(b, discard, nir_imm_float(b, NAN),
|
||||
nir_store_zs_agx(b, intr->src[0].ssa, nir_imm_float(b, NAN),
|
||||
stencil_written ? nir_imm_intN_t(b, 0, 16)
|
||||
: nir_ssa_undef(b, 1, 16) /* stencil */,
|
||||
.base = BASE_Z | (stencil_written ? BASE_S : 0));
|
||||
|
|
@ -108,11 +107,27 @@ lower_sample_mask_to_zs(nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_discard_to_sample_mask_0(nir_builder *b, nir_instr *instr,
|
||||
UNUSED void *data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
if (intr->intrinsic != nir_intrinsic_discard_agx)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_before_instr(instr);
|
||||
nir_sample_mask_agx(b, intr->src[0].ssa, nir_imm_intN_t(b, 0, 16));
|
||||
nir_instr_remove(instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
agx_nir_lower_sample_mask(nir_shader *shader, unsigned nr_samples)
|
||||
{
|
||||
if (!(shader->info.outputs_written &
|
||||
(BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK))))
|
||||
if (!shader->info.fs.uses_discard)
|
||||
return false;
|
||||
|
||||
/* sample_mask can't be used with zs_emit, so lower sample_mask to zs_emit */
|
||||
|
|
@ -131,40 +146,19 @@ agx_nir_lower_sample_mask(nir_shader *shader, unsigned nr_samples)
|
|||
return true;
|
||||
}
|
||||
|
||||
/* nir_lower_io_to_temporaries ensures that stores are in the last block */
|
||||
/* Pessimistic lowering: force late depth/stencil test. TODO: Optimize. */
|
||||
nir_shader_instructions_pass(
|
||||
shader, lower_discard_to_sample_mask_0,
|
||||
nir_metadata_block_index | nir_metadata_dominance, NULL);
|
||||
|
||||
/* nir_lower_io_to_temporaries ensures that stores are in the last block. */
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
||||
nir_block *block = nir_impl_last_block(impl);
|
||||
|
||||
nir_builder b;
|
||||
nir_builder_init(&b, impl);
|
||||
|
||||
/* Check which samples get a value written in the last block */
|
||||
uint8_t samples_set = 0;
|
||||
|
||||
nir_foreach_instr(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
if (intr->intrinsic != nir_intrinsic_sample_mask_agx)
|
||||
continue;
|
||||
|
||||
if (!nir_src_is_const(intr->src[0]))
|
||||
continue;
|
||||
|
||||
samples_set |= nir_src_as_uint(intr->src[0]);
|
||||
}
|
||||
|
||||
/* If all samples are set, we're good to go */
|
||||
if ((samples_set & BITFIELD_MASK(nr_samples)) == BITFIELD_MASK(nr_samples))
|
||||
return false;
|
||||
|
||||
/* Otherwise, at least one sample is not set in the last block and hence may
|
||||
* not be set at all. Insert an instruction in the last block to ensure it
|
||||
* will be live.
|
||||
*/
|
||||
b.cursor = nir_after_block(block);
|
||||
|
||||
nir_foreach_instr(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
|
@ -177,6 +171,7 @@ agx_nir_lower_sample_mask(nir_shader *shader, unsigned nr_samples)
|
|||
break;
|
||||
}
|
||||
|
||||
/* Run depth/stencil tests for all remaining samples */
|
||||
nir_sample_mask_agx(&b, nir_imm_intN_t(&b, ALL_SAMPLES, 16),
|
||||
nir_imm_intN_t(&b, ALL_SAMPLES, 16));
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -70,8 +70,8 @@ agx_nir_lower_alpha_to_coverage(nir_shader *shader, uint8_t nr_samples)
|
|||
nir_iadd_imm(b, nir_ishl(b, nir_imm_intN_t(b, 1, 16), bits), -1);
|
||||
|
||||
/* Discard samples that aren't covered */
|
||||
nir_sample_mask_agx(b, nir_imm_intN_t(b, ALL_SAMPLES, 16), mask);
|
||||
shader->info.outputs_written |= BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
|
||||
nir_discard_agx(b, nir_inot(b, mask));
|
||||
shader->info.fs.uses_discard = true;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ lower_wrapped(nir_builder *b, nir_instr *instr, void *data)
|
|||
case nir_intrinsic_load_local_pixel_agx:
|
||||
case nir_intrinsic_store_local_pixel_agx:
|
||||
case nir_intrinsic_store_zs_agx:
|
||||
case nir_intrinsic_sample_mask_agx: {
|
||||
case nir_intrinsic_discard_agx: {
|
||||
/* Fragment I/O inside the loop should only affect one sample. */
|
||||
unsigned mask_index =
|
||||
(intr->intrinsic == nir_intrinsic_store_local_pixel_agx) ? 1 : 0;
|
||||
|
|
@ -110,26 +110,17 @@ lower_sample_mask_write(nir_builder *b, nir_instr *instr, void *data)
|
|||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
b->cursor = nir_before_instr(instr);
|
||||
|
||||
nir_ssa_def *mask;
|
||||
if (intr->intrinsic == nir_intrinsic_sample_mask_agx) {
|
||||
/* For alpha-to-coverage */
|
||||
assert(nir_src_as_uint(intr->src[0]) == ALL_SAMPLES && "not wrapped");
|
||||
mask = intr->src[1].ssa;
|
||||
} else if (intr->intrinsic == nir_intrinsic_store_output) {
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||
if (sem.location != FRAG_RESULT_SAMPLE_MASK)
|
||||
return false;
|
||||
|
||||
/* Sample mask writes are ignored unless multisampling is used. */
|
||||
if (state->nr_samples == 1) {
|
||||
nir_instr_remove(instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
mask = nir_u2u16(b, intr->src[0].ssa);
|
||||
} else {
|
||||
if (intr->intrinsic != nir_intrinsic_store_output)
|
||||
return false;
|
||||
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||
if (sem.location != FRAG_RESULT_SAMPLE_MASK)
|
||||
return false;
|
||||
|
||||
/* Sample mask writes are ignored unless multisampling is used. */
|
||||
if (state->nr_samples == 1) {
|
||||
nir_instr_remove(instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* The Vulkan spec says:
|
||||
|
|
@ -139,13 +130,9 @@ lower_sample_mask_write(nir_builder *b, nir_instr *instr, void *data)
|
|||
* shader invocation are ignored.
|
||||
*
|
||||
* That will be satisfied by outputting gl_SampleMask for the whole pixel
|
||||
* and then lowering sample shading after (splitting up sample_mask
|
||||
* targets).
|
||||
* and then lowering sample shading after (splitting up discard targets).
|
||||
*/
|
||||
if (state->api_sample_mask)
|
||||
mask = nir_iand(b, mask, nir_load_api_sample_mask_agx(b));
|
||||
|
||||
nir_sample_mask_agx(b, nir_imm_intN_t(b, ALL_SAMPLES, 16), mask);
|
||||
nir_discard_agx(b, nir_inot(b, nir_u2u16(b, intr->src[0].ssa)));
|
||||
nir_instr_remove(instr);
|
||||
return true;
|
||||
}
|
||||
|
|
@ -179,20 +166,14 @@ lower_sample_mask_read(nir_builder *b, nir_instr *instr, UNUSED void *_)
|
|||
static void
|
||||
insert_sample_mask_write(nir_shader *s)
|
||||
{
|
||||
/* nir_lower_io_to_temporaries ensures that stores are in the last block */
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(s);
|
||||
|
||||
nir_builder b;
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(s);
|
||||
nir_builder_init(&b, impl);
|
||||
b.cursor = nir_before_block(nir_start_block(impl));
|
||||
|
||||
/* Load the desired API sample mask */
|
||||
nir_ssa_def *api_sample_mask = nir_load_api_sample_mask_agx(&b);
|
||||
|
||||
/* Kill samples that are not covered by the mask using the AGX instruction */
|
||||
nir_ssa_def *all_samples = nir_imm_intN_t(&b, ALL_SAMPLES, 16);
|
||||
nir_sample_mask_agx(&b, all_samples, api_sample_mask);
|
||||
s->info.outputs_written |= BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
|
||||
/* Kill samples that are NOT covered by the mask */
|
||||
nir_discard_agx(&b, nir_inot(&b, nir_load_api_sample_mask_agx(&b)));
|
||||
s->info.fs.uses_discard = true;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -207,20 +188,17 @@ agx_nir_lower_monolithic_msaa(nir_shader *shader, struct agx_msaa_state *state)
|
|||
assert(state->nr_samples == 1 || state->nr_samples == 2 ||
|
||||
state->nr_samples == 4);
|
||||
|
||||
/* Lower gl_SampleMask writes */
|
||||
if (shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
|
||||
/* Sample mask writes need to be lowered. This includes an API sample mask
|
||||
* lowering.
|
||||
*/
|
||||
nir_shader_instructions_pass(
|
||||
shader, lower_sample_mask_write,
|
||||
nir_metadata_block_index | nir_metadata_dominance, state);
|
||||
} else if ((state->nr_samples > 1) && state->api_sample_mask) {
|
||||
/* If there's no sample mask write, we need to add one of our own for the
|
||||
* API-level sample masking to work.
|
||||
*/
|
||||
insert_sample_mask_write(shader);
|
||||
}
|
||||
|
||||
/* Lower API sample masks */
|
||||
if ((state->nr_samples > 1) && state->api_sample_mask)
|
||||
insert_sample_mask_write(shader);
|
||||
|
||||
/* Additional, sample_mask_in needs to account for the API-level mask */
|
||||
nir_shader_instructions_pass(
|
||||
shader, lower_sample_mask_read,
|
||||
|
|
|
|||
|
|
@ -102,9 +102,9 @@ lower_to_sample(nir_builder *b, nir_instr *instr, void *_)
|
|||
* The load_sample_id intrinsics themselves are lowered later, with different
|
||||
* lowerings for monolithic vs epilogs.
|
||||
*
|
||||
* Note that fragment I/O (like store_local_pixel_agx and sample_mask_agx) does
|
||||
* not get lowered here, because that lowering is different for monolithic vs
|
||||
* FS epilogs even though there's no dependency on sample count.
|
||||
* Note that fragment I/O (like store_local_pixel_agx and discard_agx) does not
|
||||
* get lowered here, because that lowering is different for monolithic vs FS
|
||||
* epilogs even though there's no dependency on sample count.
|
||||
*/
|
||||
bool
|
||||
agx_nir_lower_sample_intrinsics(nir_shader *shader)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue