broadcom/compiler: fix early fragment tests setup

When early fragment tests are mandated by the shader, we must use
the Z value produced by the FEP even if there are elements that
would typically require late fragment tests (such as discards,
sample to coverage, etc).

This change means we also need to be a bit more careful when
we promote shaders to use early fragment tests so we don't
promote anything with discards for example.

Fixes:
dEQP-VK.fragment_operations.early_fragment.discard_early_fragment_tests_depth
dEQP-VK.fragment_operations.early_fragment.discard_early_fragment_tests_stencil

Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13837>
This commit is contained in:
Iago Toral Quiroga 2021-11-17 13:55:48 +01:00 committed by Marge Bot
parent 68ec867181
commit 5e536c97a9

View file

@ -1670,15 +1670,6 @@ vir_emit_tlb_color_write(struct v3d_compile *c, unsigned rt)
static void
emit_frag_end(struct v3d_compile *c)
{
/* If the shader has no non-TLB side effects and doesn't write Z
* we can promote it to enabling early_fragment_tests even
* if the user didn't.
*/
if (c->output_position_index == -1 &&
!(c->s->info.num_images || c->s->info.num_ssbos)) {
c->s->info.fs.early_fragment_tests = true;
}
if (c->output_sample_mask_index != -1) {
vir_SETMSF_dest(c, vir_nop_reg(),
vir_AND(c,
@ -1703,55 +1694,69 @@ emit_frag_end(struct v3d_compile *c)
}
struct qreg tlbu_reg = vir_magic_reg(V3D_QPU_WADDR_TLBU);
if (c->output_position_index != -1 &&
!c->s->info.fs.early_fragment_tests) {
struct qinst *inst = vir_MOV_dest(c, tlbu_reg,
c->outputs[c->output_position_index]);
uint8_t tlb_specifier = TLB_TYPE_DEPTH;
if (c->devinfo->ver >= 42) {
tlb_specifier |= (TLB_V42_DEPTH_TYPE_PER_PIXEL |
TLB_SAMPLE_MODE_PER_PIXEL);
} else
tlb_specifier |= TLB_DEPTH_TYPE_PER_PIXEL;
/* If the shader has no non-TLB side effects and doesn't write Z
* we can promote it to enabling early_fragment_tests even
* if the user didn't.
*/
if (c->output_position_index == -1 &&
!(c->s->info.num_images || c->s->info.num_ssbos) &&
!c->s->info.fs.uses_discard &&
!c->fs_key->sample_alpha_to_coverage &&
has_any_tlb_color_write) {
c->s->info.fs.early_fragment_tests = true;
}
/* By default, the FEP will perform early fragment tests. This can be
* disabled when needed (Z writes from shader, discards, etc), by
* setting c->writes_z to True, in which case, the QPUs need to write
* the Z value.
*/
if (!c->s->info.fs.early_fragment_tests) {
c->writes_z = true;
uint8_t tlb_specifier = TLB_TYPE_DEPTH;
struct qinst *inst;
if (c->output_position_index != -1) {
/* Shader writes to gl_FragDepth, use that */
inst = vir_MOV_dest(c, tlbu_reg,
c->outputs[c->output_position_index]);
if (c->devinfo->ver >= 42) {
tlb_specifier |= (TLB_V42_DEPTH_TYPE_PER_PIXEL |
TLB_SAMPLE_MODE_PER_PIXEL);
} else {
tlb_specifier |= TLB_DEPTH_TYPE_PER_PIXEL;
}
} else {
/* Shader doesn't write to gl_FragDepth, take Z from
* FEP.
*/
inst = vir_MOV_dest(c, tlbu_reg, vir_nop_reg());
if (c->devinfo->ver >= 42) {
/* The spec says the PER_PIXEL flag is ignored
* for invariant writes, but the simulator
* demands it.
*/
tlb_specifier |= (TLB_V42_DEPTH_TYPE_INVARIANT |
TLB_SAMPLE_MODE_PER_PIXEL);
} else {
tlb_specifier |= TLB_DEPTH_TYPE_INVARIANT;
}
/* Since (single-threaded) fragment shaders always need
* a TLB write, if we dond't have any we emit a
* passthrouh Z and flag us as potentially discarding,
* so that we can use Z as the required TLB write.
*/
if (!has_any_tlb_color_write)
c->s->info.fs.uses_discard = true;
}
inst->uniform = vir_get_uniform_index(c, QUNIFORM_CONSTANT,
tlb_specifier |
0xffffff00);
c->writes_z = true;
} else if (c->s->info.fs.uses_discard ||
!c->s->info.fs.early_fragment_tests ||
c->fs_key->sample_alpha_to_coverage ||
!has_any_tlb_color_write) {
/* Emit passthrough Z if it needed to be delayed until shader
* end due to potential discards.
*
* Since (single-threaded) fragment shaders always need a TLB
* write, emit passthrouh Z if we didn't have any color
* buffers and flag us as potentially discarding, so that we
* can use Z as the TLB write.
*/
c->s->info.fs.uses_discard = true;
struct qinst *inst = vir_MOV_dest(c, tlbu_reg,
vir_nop_reg());
uint8_t tlb_specifier = TLB_TYPE_DEPTH;
if (c->devinfo->ver >= 42) {
/* The spec says the PER_PIXEL flag is ignored for
* invariant writes, but the simulator demands it.
*/
tlb_specifier |= (TLB_V42_DEPTH_TYPE_INVARIANT |
TLB_SAMPLE_MODE_PER_PIXEL);
} else {
tlb_specifier |= TLB_DEPTH_TYPE_INVARIANT;
}
inst->uniform = vir_get_uniform_index(c,
QUNIFORM_CONSTANT,
tlb_specifier |
0xffffff00);
c->writes_z = true;
}
/* XXX: Performance improvement: Merge Z write and color writes TLB