anv: stop using 3DSTATE_WM::ForceThreadDispatchEnable

Documentation says we should leave this field to the default value
(Normal). Instead we set 3DSTATE_PS_EXTRA::PixelShaderHasUAV when we
see that a fragment shader has side effects.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30408>
This commit is contained in:
Lionel Landwerlin 2024-07-28 00:36:54 +03:00 committed by Marge Bot
parent c818de7360
commit eebb6cd236
3 changed files with 29 additions and 36 deletions

View file

@ -1583,6 +1583,7 @@ struct anv_gfx_dynamic_state {
/* 3DSTATE_PS_EXTRA */ /* 3DSTATE_PS_EXTRA */
struct { struct {
bool PixelShaderHasUAV;
bool PixelShaderIsPerSample; bool PixelShaderIsPerSample;
bool PixelShaderKillsPixel; bool PixelShaderKillsPixel;
bool PixelShaderIsPerCoarsePixel; bool PixelShaderIsPerCoarsePixel;
@ -1711,7 +1712,6 @@ struct anv_gfx_dynamic_state {
/* 3DSTATE_WM */ /* 3DSTATE_WM */
struct { struct {
uint32_t ForceThreadDispatchEnable;
bool LineStippleEnable; bool LineStippleEnable;
uint32_t BarycentricInterpolationMode; uint32_t BarycentricInterpolationMode;
} wm; } wm;
@ -4707,7 +4707,6 @@ struct anv_graphics_pipeline {
bool rp_has_ds_self_dep; bool rp_has_ds_self_dep;
bool kill_pixel; bool kill_pixel;
bool force_fragment_thread_dispatch;
bool uses_xfb; bool uses_xfb;
bool sample_shading_enable; bool sample_shading_enable;
float min_sample_shading; float min_sample_shading;

View file

@ -1012,19 +1012,6 @@ genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer)
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS_ENABLE))) BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS_ENABLE)))
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SAMPLE_PATTERN); BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SAMPLE_PATTERN);
if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
(gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) {
/* 3DSTATE_WM in the hope we can avoid spawning fragment shaders
* threads.
*/
bool force_thread_dispatch =
anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) &&
(pipeline->force_fragment_thread_dispatch ||
anv_cmd_buffer_all_color_write_masked(cmd_buffer));
SET(WM, wm.ForceThreadDispatchEnable, force_thread_dispatch ? ForceON : 0);
}
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) || if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE)) { BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE)) {
SET_STAGE(PS_EXTRA, ps_extra.PixelShaderKillsPixel, SET_STAGE(PS_EXTRA, ps_extra.PixelShaderKillsPixel,
@ -1034,6 +1021,33 @@ genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer)
FRAGMENT); FRAGMENT);
} }
#if GFX_VERx10 >= 125
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) {
SET_STAGE(PS_EXTRA, ps_extra.PixelShaderHasUAV,
wm_prog_data && wm_prog_data->has_side_effects,
FRAGMENT);
}
#else
if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE)) {
/* Prior to Gfx12.5 the HW seems to avoid spawning fragment shaders even
* if 3DSTATE_PS_EXTRA::PixelShaderKillsPixel=true when
* 3DSTATE_PS_BLEND::HasWriteableRT=false. This is causing problems with
* occlusion queries with 0 attachments. There are no CTS tests
* exercising this but zink+anv fails a bunch of tests like piglit
* arb_framebuffer_no_attachments-query.
*
* Here we choose to tweak the PixelShaderHasUAV to make sure the
* fragment shaders are run properly.
*/
SET_STAGE(PS_EXTRA, ps_extra.PixelShaderHasUAV,
wm_prog_data && (wm_prog_data->has_side_effects ||
(gfx->color_att_count == 0 &&
gfx->n_occlusion_queries > 0)),
FRAGMENT);
}
#endif
if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) || if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
(gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) || (gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP) ||
@ -1759,6 +1773,7 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_EXTRA)) { if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_EXTRA)) {
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_PS_EXTRA), anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_PS_EXTRA),
pipeline, partial.ps_extra, pse) { pipeline, partial.ps_extra, pse) {
SET(pse, ps_extra, PixelShaderHasUAV);
SET(pse, ps_extra, PixelShaderIsPerSample); SET(pse, ps_extra, PixelShaderIsPerSample);
#if GFX_VER >= 11 #if GFX_VER >= 11
SET(pse, ps_extra, PixelShaderIsPerCoarsePixel); SET(pse, ps_extra, PixelShaderIsPerCoarsePixel);
@ -2136,7 +2151,6 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WM)) { if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WM)) {
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_WM), anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_WM),
pipeline, partial.wm, wm) { pipeline, partial.wm, wm) {
SET(wm, wm, ForceThreadDispatchEnable);
SET(wm, wm, LineStippleEnable); SET(wm, wm, LineStippleEnable);
SET(wm, wm, BarycentricInterpolationMode); SET(wm, wm, BarycentricInterpolationMode);
} }

View file

@ -1642,26 +1642,6 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline,
} else { } else {
wm.EarlyDepthStencilControl = EDSC_NORMAL; wm.EarlyDepthStencilControl = EDSC_NORMAL;
} }
/* Gen8 hardware tries to compute ThreadDispatchEnable for us but
* doesn't take into account KillPixels when no depth or stencil
* writes are enabled. In order for occlusion queries to work
* correctly with no attachments, we need to force-enable PS thread
* dispatch.
*
* The BDW docs are pretty clear that that this bit isn't validated
* and probably shouldn't be used in production:
*
* "This must always be set to Normal. This field should not be
* tested for functional validation."
*
* Unfortunately, however, the other mechanism we have for doing this
* is 3DSTATE_PS_EXTRA::PixelShaderHasUAV which causes hangs on BDW.
* Given two bad options, we choose the one which works.
*/
pipeline->force_fragment_thread_dispatch =
wm_prog_data->has_side_effects ||
wm_prog_data->uses_kill;
} }
} }
} }