mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 17:30:12 +01:00
pan/earlyzs: Support the shader ZS read-only case and its optimization on v10+
We are about to allow ZS tile buffer reads in panvk in order to support VK_KHR_dynamic_rendering_local_read, and this requires dealing with a new case in the early ZS logic. Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32540>
This commit is contained in:
parent
d2cd5ca609
commit
fe21da08ed
10 changed files with 84 additions and 35 deletions
|
|
@ -508,7 +508,7 @@ panfrost_prepare_fs_state(struct panfrost_context *ctx, uint64_t *blend_shaders,
|
|||
struct pan_earlyzs_state earlyzs = pan_earlyzs_get(
|
||||
fs->earlyzs, ctx->depth_stencil->writes_zs || has_oq,
|
||||
ctx->blend->base.alpha_to_coverage,
|
||||
ctx->depth_stencil->zs_always_passes);
|
||||
ctx->depth_stencil->zs_always_passes, false);
|
||||
|
||||
cfg.properties.pixel_kill_operation = earlyzs.kill;
|
||||
cfg.properties.zs_update_operation = earlyzs.update;
|
||||
|
|
|
|||
|
|
@ -1208,7 +1208,7 @@ csf_emit_draw_state(struct panfrost_batch *batch,
|
|||
struct pan_earlyzs_state earlyzs = pan_earlyzs_get(
|
||||
fs->earlyzs, ctx->depth_stencil->writes_zs || has_oq,
|
||||
ctx->blend->base.alpha_to_coverage,
|
||||
ctx->depth_stencil->zs_always_passes);
|
||||
ctx->depth_stencil->zs_always_passes, false);
|
||||
|
||||
cfg.pixel_kill_operation = (enum mali_pixel_kill)earlyzs.kill;
|
||||
cfg.zs_update_operation = (enum mali_pixel_kill)earlyzs.update;
|
||||
|
|
|
|||
|
|
@ -581,7 +581,7 @@ jm_emit_tiler_draw(struct mali_draw_packed *out, struct panfrost_batch *batch,
|
|||
struct pan_earlyzs_state earlyzs = pan_earlyzs_get(
|
||||
fs->earlyzs, ctx->depth_stencil->writes_zs || has_oq,
|
||||
ctx->blend->base.alpha_to_coverage,
|
||||
ctx->depth_stencil->zs_always_passes);
|
||||
ctx->depth_stencil->zs_always_passes, false);
|
||||
|
||||
cfg.flags_0.pixel_kill_operation = earlyzs.kill;
|
||||
cfg.flags_0.zs_update_operation = earlyzs.update;
|
||||
|
|
|
|||
|
|
@ -355,6 +355,7 @@ panfrost_new_variant_locked(struct panfrost_context *ctx,
|
|||
struct panfrost_uncompiled_shader *uncompiled,
|
||||
struct panfrost_shader_key *key)
|
||||
{
|
||||
struct panfrost_device *dev = pan_device(ctx->base.screen);
|
||||
struct panfrost_compiled_shader *prog = panfrost_alloc_variant(uncompiled);
|
||||
|
||||
*prog = (struct panfrost_compiled_shader){
|
||||
|
|
@ -365,7 +366,7 @@ panfrost_new_variant_locked(struct panfrost_context *ctx,
|
|||
panfrost_shader_get(ctx->base.screen, &ctx->shaders, &ctx->descs, uncompiled,
|
||||
&ctx->base.debug, prog, 0);
|
||||
|
||||
prog->earlyzs = pan_earlyzs_analyze(&prog->info);
|
||||
prog->earlyzs = pan_earlyzs_analyze(&prog->info, dev->arch);
|
||||
|
||||
return prog;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,9 +30,9 @@
|
|||
* force early.
|
||||
*/
|
||||
static enum pan_earlyzs
|
||||
best_early_mode(bool zs_always_passes)
|
||||
best_early_mode(bool zs_always_passes, bool force_early)
|
||||
{
|
||||
if (zs_always_passes)
|
||||
if (zs_always_passes && !force_early)
|
||||
return PAN_EARLYZS_WEAK_EARLY;
|
||||
else
|
||||
return PAN_EARLYZS_FORCE_EARLY;
|
||||
|
|
@ -45,7 +45,8 @@ best_early_mode(bool zs_always_passes)
|
|||
*/
|
||||
static struct pan_earlyzs_state
|
||||
analyze(const struct pan_shader_info *s, bool writes_zs_or_oq,
|
||||
bool alpha_to_coverage, bool zs_always_passes)
|
||||
bool alpha_to_coverage, bool zs_always_passes,
|
||||
bool shader_reads_zs, bool can_optimize_shader_readonly_zs)
|
||||
{
|
||||
/* If the shader writes depth or stencil, all depth/stencil tests must
|
||||
* be deferred until the value is known after the ZS_EMIT instruction,
|
||||
|
|
@ -62,6 +63,8 @@ analyze(const struct pan_shader_info *s, bool writes_zs_or_oq,
|
|||
bool shader_writes_zs = (s->fs.writes_depth || s->fs.writes_stencil);
|
||||
bool late_update = shader_writes_zs || alpha_to_coverage;
|
||||
bool late_kill = shader_writes_zs;
|
||||
bool force_early_update = s->fs.early_fragment_tests;
|
||||
bool force_early_kill = s->fs.early_fragment_tests;
|
||||
|
||||
/* Late coverage updates are required if the coverage mask depends on
|
||||
* the results of the shader. Discards are implemented as coverage mask
|
||||
|
|
@ -90,16 +93,37 @@ analyze(const struct pan_shader_info *s, bool writes_zs_or_oq,
|
|||
*/
|
||||
late_kill |= s->writes_global;
|
||||
|
||||
/* Shader reads require late depth/stencil tests to ensure the shader
|
||||
* isn't killed before the side effects execute, unless the HW supports
|
||||
* read-only ZS optimization, in which case it can be lowered to
|
||||
* force-early. */
|
||||
bool optimize_shader_read_only_zs = false;
|
||||
if (shader_reads_zs) {
|
||||
if (!late_update && can_optimize_shader_readonly_zs) {
|
||||
optimize_shader_read_only_zs = true;
|
||||
force_early_update |= true;
|
||||
} else {
|
||||
late_update |= true;
|
||||
}
|
||||
|
||||
if (!late_kill && can_optimize_shader_readonly_zs) {
|
||||
optimize_shader_read_only_zs = true;
|
||||
force_early_kill |= true;
|
||||
}
|
||||
}
|
||||
|
||||
/* Finally, the shader may override and force early fragment tests */
|
||||
late_update &= !s->fs.early_fragment_tests;
|
||||
late_kill &= !s->fs.early_fragment_tests;
|
||||
|
||||
/* Collect results */
|
||||
enum pan_earlyzs early_mode = best_early_mode(zs_always_passes);
|
||||
|
||||
return (struct pan_earlyzs_state){
|
||||
.update = late_update ? PAN_EARLYZS_FORCE_LATE : early_mode,
|
||||
.kill = late_kill ? PAN_EARLYZS_FORCE_LATE : early_mode,
|
||||
.update = late_update
|
||||
? PAN_EARLYZS_FORCE_LATE
|
||||
: best_early_mode(zs_always_passes, force_early_update),
|
||||
.kill = late_kill ? PAN_EARLYZS_FORCE_LATE
|
||||
: best_early_mode(zs_always_passes, force_early_kill),
|
||||
.shader_readonly_zs = optimize_shader_read_only_zs,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -108,14 +132,20 @@ analyze(const struct pan_shader_info *s, bool writes_zs_or_oq,
|
|||
* Returns a lookup table of configurations indexed by the API state.
|
||||
*/
|
||||
struct pan_earlyzs_lut
|
||||
pan_earlyzs_analyze(const struct pan_shader_info *s)
|
||||
pan_earlyzs_analyze(const struct pan_shader_info *s, unsigned arch)
|
||||
{
|
||||
/* Shader read-only ZS optimization appeared in v10. */
|
||||
bool can_optimize_shader_readonly_zs = arch >= 10;
|
||||
struct pan_earlyzs_lut lut;
|
||||
|
||||
for (unsigned v0 = 0; v0 < 2; ++v0) {
|
||||
for (unsigned v1 = 0; v1 < 2; ++v1) {
|
||||
for (unsigned v2 = 0; v2 < 2; ++v2)
|
||||
lut.states[v0][v1][v2] = analyze(s, v0, v1, v2);
|
||||
for (unsigned v2 = 0; v2 < 2; ++v2) {
|
||||
for (unsigned v3 = 0; v3 < 2; ++v3) {
|
||||
lut.states[v0][v1][v2][v3] =
|
||||
analyze(s, v0, v1, v2, v3, can_optimize_shader_readonly_zs);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -45,8 +45,11 @@ struct pan_earlyzs_state {
|
|||
/* Pixel kill */
|
||||
enum pan_earlyzs kill : 2;
|
||||
|
||||
/* True if the shader read-only ZS optimization should be enabled */
|
||||
bool shader_readonly_zs : 1;
|
||||
|
||||
/* So it fits in a byte */
|
||||
unsigned padding : 4;
|
||||
unsigned padding : 3;
|
||||
};
|
||||
|
||||
/* Internal lookup table. Users should treat as an opaque structure and only
|
||||
|
|
@ -54,7 +57,7 @@ struct pan_earlyzs_state {
|
|||
* for definition of the arrays.
|
||||
*/
|
||||
struct pan_earlyzs_lut {
|
||||
struct pan_earlyzs_state states[2][2][2];
|
||||
struct pan_earlyzs_state states[2][2][2][2];
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
@ -63,14 +66,17 @@ struct pan_earlyzs_lut {
|
|||
*/
|
||||
static inline struct pan_earlyzs_state
|
||||
pan_earlyzs_get(struct pan_earlyzs_lut lut, bool writes_zs_or_oq,
|
||||
bool alpha_to_coverage, bool zs_always_passes)
|
||||
bool alpha_to_coverage, bool zs_always_passes,
|
||||
bool shader_reads_zs)
|
||||
{
|
||||
return lut.states[writes_zs_or_oq][alpha_to_coverage][zs_always_passes];
|
||||
return lut.states[writes_zs_or_oq][alpha_to_coverage][zs_always_passes]
|
||||
[shader_reads_zs];
|
||||
}
|
||||
|
||||
struct pan_shader_info;
|
||||
|
||||
struct pan_earlyzs_lut pan_earlyzs_analyze(const struct pan_shader_info *s);
|
||||
struct pan_earlyzs_lut pan_earlyzs_analyze(const struct pan_shader_info *s,
|
||||
unsigned arch);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern C */
|
||||
|
|
|
|||
|
|
@ -34,15 +34,17 @@
|
|||
* under test, only the external API. So we test only the composition.
|
||||
*/
|
||||
|
||||
#define ZS_WRITEMASK BITFIELD_BIT(0)
|
||||
#define ALPHA2COV BITFIELD_BIT(1)
|
||||
#define ZS_ALWAYS_PASSES BITFIELD_BIT(2)
|
||||
#define DISCARD BITFIELD_BIT(3)
|
||||
#define WRITES_Z BITFIELD_BIT(4)
|
||||
#define WRITES_S BITFIELD_BIT(5)
|
||||
#define WRITES_COV BITFIELD_BIT(6)
|
||||
#define SIDEFX BITFIELD_BIT(7)
|
||||
#define API_EARLY BITFIELD_BIT(8)
|
||||
#define ZS_WRITEMASK BITFIELD_BIT(0)
|
||||
#define ALPHA2COV BITFIELD_BIT(1)
|
||||
#define ZS_ALWAYS_PASSES BITFIELD_BIT(2)
|
||||
#define DISCARD BITFIELD_BIT(3)
|
||||
#define WRITES_Z BITFIELD_BIT(4)
|
||||
#define WRITES_S BITFIELD_BIT(5)
|
||||
#define WRITES_COV BITFIELD_BIT(6)
|
||||
#define SIDEFX BITFIELD_BIT(7)
|
||||
#define API_EARLY BITFIELD_BIT(8)
|
||||
#define SHADER_READS_ZS BITFIELD_BIT(9)
|
||||
#define ARCH_HAS_READONLY_ZS_OPT BITFIELD_BIT(10)
|
||||
|
||||
static void
|
||||
test(enum pan_earlyzs expected_update, enum pan_earlyzs expected_kill,
|
||||
|
|
@ -56,9 +58,10 @@ test(enum pan_earlyzs expected_update, enum pan_earlyzs expected_kill,
|
|||
info.fs.early_fragment_tests = !!(flags & API_EARLY);
|
||||
info.writes_global = !!(flags & SIDEFX);
|
||||
|
||||
struct pan_earlyzs_state result =
|
||||
pan_earlyzs_get(pan_earlyzs_analyze(&info), !!(flags & ZS_WRITEMASK),
|
||||
!!(flags & ALPHA2COV), !!(flags & ZS_ALWAYS_PASSES));
|
||||
struct pan_earlyzs_state result = pan_earlyzs_get(
|
||||
pan_earlyzs_analyze(&info, flags & ARCH_HAS_READONLY_ZS_OPT ? 10 : 9),
|
||||
!!(flags & ZS_WRITEMASK), !!(flags & ALPHA2COV),
|
||||
!!(flags & ZS_ALWAYS_PASSES), !!(flags & SHADER_READS_ZS));
|
||||
|
||||
ASSERT_EQ(result.update, expected_update);
|
||||
ASSERT_EQ(result.kill, expected_kill);
|
||||
|
|
@ -134,6 +137,15 @@ TEST(EarlyZS, NoSideFXNoShaderZS)
|
|||
CASE(FORCE_EARLY, FORCE_EARLY, ZS_WRITEMASK);
|
||||
}
|
||||
|
||||
TEST(EarlyZS, ShaderReadOnlyZS)
|
||||
{
|
||||
CASE(FORCE_LATE, FORCE_LATE, SIDEFX | SHADER_READS_ZS);
|
||||
CASE(FORCE_EARLY, FORCE_LATE, SIDEFX | SHADER_READS_ZS | ARCH_HAS_READONLY_ZS_OPT);
|
||||
CASE(FORCE_EARLY, FORCE_EARLY, SHADER_READS_ZS | ARCH_HAS_READONLY_ZS_OPT);
|
||||
CASE(FORCE_LATE, WEAK_EARLY, SHADER_READS_ZS | ZS_ALWAYS_PASSES);
|
||||
CASE(FORCE_LATE, FORCE_EARLY, SHADER_READS_ZS);
|
||||
}
|
||||
|
||||
TEST(EarlyZS, NoSideFXNoShaderZSAlt)
|
||||
{
|
||||
CASE(WEAK_EARLY, WEAK_EARLY, ZS_ALWAYS_PASSES);
|
||||
|
|
|
|||
|
|
@ -1534,7 +1534,7 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf)
|
|||
|
||||
struct pan_earlyzs_state earlyzs =
|
||||
pan_earlyzs_get(fs->fs.earlyzs_lut, writes_zs || oq,
|
||||
alpha_to_coverage, zs_always_passes);
|
||||
alpha_to_coverage, zs_always_passes, false);
|
||||
|
||||
cfg.pixel_kill_operation = (enum mali_pixel_kill)earlyzs.kill;
|
||||
cfg.zs_update_operation = (enum mali_pixel_kill)earlyzs.update;
|
||||
|
|
|
|||
|
|
@ -296,7 +296,7 @@ panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf,
|
|||
|
||||
struct pan_earlyzs_state earlyzs =
|
||||
pan_earlyzs_get(fs->fs.earlyzs_lut, writes_zs || oq,
|
||||
alpha_to_coverage, zs_always_passes);
|
||||
alpha_to_coverage, zs_always_passes, false);
|
||||
|
||||
cfg.properties.pixel_kill_operation = earlyzs.kill;
|
||||
cfg.properties.zs_update_operation = earlyzs.update;
|
||||
|
|
|
|||
|
|
@ -922,7 +922,7 @@ panvk_compile_nir(struct panvk_device *dev, nir_shader *nir,
|
|||
break;
|
||||
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
shader->fs.earlyzs_lut = pan_earlyzs_analyze(&shader->info);
|
||||
shader->fs.earlyzs_lut = pan_earlyzs_analyze(&shader->info, PAN_ARCH);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
@ -1339,7 +1339,7 @@ panvk_deserialize_shader(struct vk_device *vk_dev, struct blob_reader *blob,
|
|||
break;
|
||||
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
shader->fs.earlyzs_lut = pan_earlyzs_analyze(&shader->info);
|
||||
shader->fs.earlyzs_lut = pan_earlyzs_analyze(&shader->info, PAN_ARCH);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue