crocus/gen8: add PMA fix from iris

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11507>
This commit is contained in:
Dave Airlie 2021-06-22 04:57:07 +10:00 committed by Marge Bot
parent 786cf437db
commit 750e0c1ff3
5 changed files with 200 additions and 1 deletions

View file

@ -286,6 +286,11 @@ crocus_blorp_exec(struct blorp_batch *blorp_batch,
crocus_require_command_space(batch, 1400);
crocus_require_statebuffer_space(batch, 600);
batch->no_wrap = true;
#if GFX_VER == 8
genX(crocus_update_pma_fix)(ice, batch, false);
#endif
#if GFX_VER == 6
/* Emit workaround flushes when we switch from drawing to blorping. */
crocus_emit_post_sync_nonzero_flush(batch);

View file

@ -113,6 +113,7 @@ enum {
#define CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS (1ull << 35)
#define CROCUS_DIRTY_GEN6_SVBI (1ull << 36)
#define CROCUS_DIRTY_GEN8_VF_TOPOLOGY (1ull << 37)
#define CROCUS_DIRTY_GEN8_PMA_FIX (1ull << 38)
#define CROCUS_ALL_DIRTY_FOR_COMPUTE (CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES)

View file

@ -34,7 +34,9 @@ void genX(crocus_upload_urb)(struct crocus_batch *batch,
unsigned vs_size,
bool gs_present,
unsigned gs_size);
void genX(crocus_update_pma_fix)(struct crocus_context *ice,
struct crocus_batch *batch,
bool enable);
/* crocus_blorp.c */
void genX(crocus_init_blorp)(struct crocus_context *ice);

View file

@ -3122,6 +3122,8 @@ static void
crocus_bind_fs_state(struct pipe_context *ctx, void *state)
{
struct crocus_context *ice = (struct crocus_context *) ctx;
struct crocus_screen *screen = (struct crocus_screen *) ctx->screen;
const struct intel_device_info *devinfo = &screen->devinfo;
struct crocus_uncompiled_shader *old_ish =
ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
struct crocus_uncompiled_shader *new_ish = state;
@ -3136,6 +3138,8 @@ crocus_bind_fs_state(struct pipe_context *ctx, void *state)
(new_ish->nir->info.outputs_written & color_bits))
ice->state.dirty |= CROCUS_DIRTY_WM;
if (devinfo->ver == 8)
ice->state.dirty |= CROCUS_DIRTY_GEN8_PMA_FIX;
bind_shader_state((void *) ctx, state, MESA_SHADER_FRAGMENT);
}

View file

@ -1430,6 +1430,10 @@ struct crocus_genx_state {
struct brw_image_param image_param[PIPE_MAX_SHADER_IMAGES];
#endif
} shaders[MESA_SHADER_STAGES];
#if GFX_VER == 8
bool pma_fix_enabled;
#endif
};
/**
@ -1599,6 +1603,9 @@ crocus_bind_blend_state(struct pipe_context *ctx, void *state)
#endif
#if GFX_VER >= 7
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_FS;
#endif
#if GFX_VER == 8
ice->state.dirty |= CROCUS_DIRTY_GEN8_PMA_FIX;
#endif
ice->state.dirty |= CROCUS_DIRTY_COLOR_CALC_STATE;
ice->state.dirty |= CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES;
@ -1702,10 +1709,183 @@ crocus_bind_zsa_state(struct pipe_context *ctx, void *state)
ice->state.dirty |= CROCUS_DIRTY_CC_VIEWPORT;
#if GFX_VER >= 6
ice->state.dirty |= CROCUS_DIRTY_GEN6_WM_DEPTH_STENCIL;
#endif
#if GFX_VER == 8
ice->state.dirty |= CROCUS_DIRTY_GEN8_PMA_FIX;
#endif
ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[CROCUS_NOS_DEPTH_STENCIL_ALPHA];
}
#if GFX_VER == 8
static bool
want_pma_fix(struct crocus_context *ice)
{
UNUSED struct crocus_screen *screen = (void *) ice->ctx.screen;
UNUSED const struct intel_device_info *devinfo = &screen->devinfo;
const struct brw_wm_prog_data *wm_prog_data = (void *)
ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data;
const struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
const struct crocus_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa;
const struct crocus_blend_state *cso_blend = ice->state.cso_blend;
/* In very specific combinations of state, we can instruct Gfx8-9 hardware
* to avoid stalling at the pixel mask array. The state equations are
* documented in these places:
*
* - Gfx8 Depth PMA Fix: CACHE_MODE_1::NP_PMA_FIX_ENABLE
* - Gfx9 Stencil PMA Fix: CACHE_MODE_0::STC PMA Optimization Enable
*
* Both equations share some common elements:
*
* no_hiz_op =
* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
* 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
* 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
* 3DSTATE_WM_HZ_OP::StencilBufferClear) &&
*
* killpixels =
* 3DSTATE_WM::ForceKillPix != ForceOff &&
* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
* 3DSTATE_PS_BLEND::AlphaTestEnable ||
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable)
*
* (Technically the stencil PMA treats ForceKillPix differently,
* but I think this is a documentation oversight, and we don't
* ever use it in this way, so it doesn't matter).
*
* common_pma_fix =
* 3DSTATE_WM::ForceThreadDispatch != 1 &&
* 3DSTATE_RASTER::ForceSampleCount == NUMRASTSAMPLES_0 &&
* 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&
* 3DSTATE_DEPTH_BUFFER::HIZ Enable &&
* 3DSTATE_WM::EDSC_Mode != EDSC_PREPS &&
* 3DSTATE_PS_EXTRA::PixelShaderValid &&
* no_hiz_op
*
* These are always true:
*
* 3DSTATE_RASTER::ForceSampleCount == NUMRASTSAMPLES_0
* 3DSTATE_PS_EXTRA::PixelShaderValid
*
* Also, we never use the normal drawing path for HiZ ops; these are true:
*
* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
* 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
* 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
* 3DSTATE_WM_HZ_OP::StencilBufferClear)
*
* This happens sometimes:
*
* 3DSTATE_WM::ForceThreadDispatch != 1
*
* However, we choose to ignore it as it either agrees with the signal
* (dispatch was already enabled, so nothing out of the ordinary), or
* there are no framebuffer attachments (so no depth or HiZ anyway,
* meaning the PMA signal will already be disabled).
*/
if (!cso_fb->zsbuf)
return false;
struct crocus_resource *zres, *sres;
crocus_get_depth_stencil_resources(devinfo,
cso_fb->zsbuf->texture, &zres, &sres);
/* 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&
* 3DSTATE_DEPTH_BUFFER::HIZ Enable &&
*/
if (!zres || !crocus_resource_level_has_hiz(zres, cso_fb->zsbuf->u.tex.level))
return false;
/* 3DSTATE_WM::EDSC_Mode != EDSC_PREPS */
if (wm_prog_data->early_fragment_tests)
return false;
/* 3DSTATE_WM::ForceKillPix != ForceOff &&
* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
* 3DSTATE_PS_BLEND::AlphaTestEnable ||
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable)
*/
bool killpixels = wm_prog_data->uses_kill || wm_prog_data->uses_omask ||
cso_blend->cso.alpha_to_coverage || cso_zsa->cso.alpha_enabled;
/* The Gfx8 depth PMA equation becomes:
*
* depth_writes =
* 3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
* 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE
*
* stencil_writes =
* 3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
* 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE
*
* Z_PMA_OPT =
* common_pma_fix &&
* 3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable &&
* ((killpixels && (depth_writes || stencil_writes)) ||
* 3DSTATE_PS_EXTRA::PixelShaderComputedDepthMode != PSCDEPTH_OFF)
*
*/
if (!cso_zsa->cso.depth_enabled)
return false;
return wm_prog_data->computed_depth_mode != PSCDEPTH_OFF ||
(killpixels && (cso_zsa->depth_writes_enabled ||
(sres && cso_zsa->stencil_writes_enabled)));
}
#endif
void
genX(crocus_update_pma_fix)(struct crocus_context *ice,
struct crocus_batch *batch,
bool enable)
{
#if GFX_VER == 8
struct crocus_genx_state *genx = ice->state.genx;
if (genx->pma_fix_enabled == enable)
return;
genx->pma_fix_enabled = enable;
/* According to the Broadwell PIPE_CONTROL documentation, software should
* emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set
* prior to the LRI. If stencil buffer writes are enabled, then a Render * Cache Flush is also necessary.
*
* The Gfx9 docs say to use a depth stall rather than a command streamer
* stall. However, the hardware seems to violently disagree. A full
* command streamer stall seems to be needed in both cases.
*/
crocus_emit_pipe_control_flush(batch, "PMA fix change (1/2)",
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_RENDER_TARGET_FLUSH);
crocus_emit_reg(batch, GENX(CACHE_MODE_1), reg) {
reg.NPPMAFixEnable = enable;
reg.NPEarlyZFailsDisable = enable;
reg.NPPMAFixEnableMask = true;
reg.NPEarlyZFailsDisableMask = true;
}
/* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache
* Flush bits is often necessary. We do it regardless because it's easier.
* The render cache flush is also necessary if stencil writes are enabled.
*
* Again, the Gfx9 docs give a different set of flushes but the Broadwell
* flushes seem to work just as well.
*/
crocus_emit_pipe_control_flush(batch, "PMA fix change (1/2)",
PIPE_CONTROL_DEPTH_STALL |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_RENDER_TARGET_FLUSH);
#endif
}
static float
get_line_width(const struct pipe_rasterizer_state *state)
{
@ -7158,6 +7338,13 @@ crocus_upload_dirty_render_state(struct crocus_context *ice,
}
#endif
#if GFX_VER == 8
if (dirty & CROCUS_DIRTY_GEN8_PMA_FIX) {
bool enable = want_pma_fix(ice);
genX(crocus_update_pma_fix)(ice, batch, enable);
}
#endif
#if GFX_VER <= 5
if (dirty & CROCUS_DIRTY_GEN4_CURBE) {
gen4_upload_curbe(batch);