mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 15:30:14 +01:00
anv: stop using 3DSTATE_WM::ForceThreadDispatchEnable
Documentation says we should leave this field to the default value (Normal). Instead we set 3DSTATE_PS_EXTRA::PixelShaderHasUAV when we see that a fragment shader has side effects. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30408>
This commit is contained in:
parent
c818de7360
commit
eebb6cd236
3 changed files with 29 additions and 36 deletions
|
|
@ -1583,6 +1583,7 @@ struct anv_gfx_dynamic_state {
|
||||||
|
|
||||||
/* 3DSTATE_PS_EXTRA */
|
/* 3DSTATE_PS_EXTRA */
|
||||||
struct {
|
struct {
|
||||||
|
bool PixelShaderHasUAV;
|
||||||
bool PixelShaderIsPerSample;
|
bool PixelShaderIsPerSample;
|
||||||
bool PixelShaderKillsPixel;
|
bool PixelShaderKillsPixel;
|
||||||
bool PixelShaderIsPerCoarsePixel;
|
bool PixelShaderIsPerCoarsePixel;
|
||||||
|
|
@ -1711,7 +1712,6 @@ struct anv_gfx_dynamic_state {
|
||||||
|
|
||||||
/* 3DSTATE_WM */
|
/* 3DSTATE_WM */
|
||||||
struct {
|
struct {
|
||||||
uint32_t ForceThreadDispatchEnable;
|
|
||||||
bool LineStippleEnable;
|
bool LineStippleEnable;
|
||||||
uint32_t BarycentricInterpolationMode;
|
uint32_t BarycentricInterpolationMode;
|
||||||
} wm;
|
} wm;
|
||||||
|
|
@ -4707,7 +4707,6 @@ struct anv_graphics_pipeline {
|
||||||
bool rp_has_ds_self_dep;
|
bool rp_has_ds_self_dep;
|
||||||
|
|
||||||
bool kill_pixel;
|
bool kill_pixel;
|
||||||
bool force_fragment_thread_dispatch;
|
|
||||||
bool uses_xfb;
|
bool uses_xfb;
|
||||||
bool sample_shading_enable;
|
bool sample_shading_enable;
|
||||||
float min_sample_shading;
|
float min_sample_shading;
|
||||||
|
|
|
||||||
|
|
@ -1012,19 +1012,6 @@ genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS_ENABLE)))
|
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS_ENABLE)))
|
||||||
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SAMPLE_PATTERN);
|
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SAMPLE_PATTERN);
|
||||||
|
|
||||||
if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
|
|
||||||
(gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
|
|
||||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) {
|
|
||||||
/* 3DSTATE_WM in the hope we can avoid spawning fragment shaders
|
|
||||||
* threads.
|
|
||||||
*/
|
|
||||||
bool force_thread_dispatch =
|
|
||||||
anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) &&
|
|
||||||
(pipeline->force_fragment_thread_dispatch ||
|
|
||||||
anv_cmd_buffer_all_color_write_masked(cmd_buffer));
|
|
||||||
SET(WM, wm.ForceThreadDispatchEnable, force_thread_dispatch ? ForceON : 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
|
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
|
||||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE)) {
|
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE)) {
|
||||||
SET_STAGE(PS_EXTRA, ps_extra.PixelShaderKillsPixel,
|
SET_STAGE(PS_EXTRA, ps_extra.PixelShaderKillsPixel,
|
||||||
|
|
@ -1034,6 +1021,33 @@ genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||||
FRAGMENT);
|
FRAGMENT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if GFX_VERx10 >= 125
|
||||||
|
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) {
|
||||||
|
SET_STAGE(PS_EXTRA, ps_extra.PixelShaderHasUAV,
|
||||||
|
wm_prog_data && wm_prog_data->has_side_effects,
|
||||||
|
FRAGMENT);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
|
||||||
|
ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE)) {
|
||||||
|
/* Prior to Gfx12.5 the HW seems to avoid spawning fragment shaders even
|
||||||
|
* if 3DSTATE_PS_EXTRA::PixelShaderKillsPixel=true when
|
||||||
|
* 3DSTATE_PS_BLEND::HasWriteableRT=false. This is causing problems with
|
||||||
|
* occlusion queries with 0 attachments. There are no CTS tests
|
||||||
|
* exercising this but zink+anv fails a bunch of tests like piglit
|
||||||
|
* arb_framebuffer_no_attachments-query.
|
||||||
|
*
|
||||||
|
* Here we choose to tweak the PixelShaderHasUAV to make sure the
|
||||||
|
* fragment shaders are run properly.
|
||||||
|
*/
|
||||||
|
SET_STAGE(PS_EXTRA, ps_extra.PixelShaderHasUAV,
|
||||||
|
wm_prog_data && (wm_prog_data->has_side_effects ||
|
||||||
|
(gfx->color_att_count == 0 &&
|
||||||
|
gfx->n_occlusion_queries > 0)),
|
||||||
|
FRAGMENT);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
|
if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
|
||||||
(gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
|
(gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
|
||||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP) ||
|
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP) ||
|
||||||
|
|
@ -1759,6 +1773,7 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
|
||||||
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_EXTRA)) {
|
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_EXTRA)) {
|
||||||
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_PS_EXTRA),
|
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_PS_EXTRA),
|
||||||
pipeline, partial.ps_extra, pse) {
|
pipeline, partial.ps_extra, pse) {
|
||||||
|
SET(pse, ps_extra, PixelShaderHasUAV);
|
||||||
SET(pse, ps_extra, PixelShaderIsPerSample);
|
SET(pse, ps_extra, PixelShaderIsPerSample);
|
||||||
#if GFX_VER >= 11
|
#if GFX_VER >= 11
|
||||||
SET(pse, ps_extra, PixelShaderIsPerCoarsePixel);
|
SET(pse, ps_extra, PixelShaderIsPerCoarsePixel);
|
||||||
|
|
@ -2136,7 +2151,6 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
|
||||||
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WM)) {
|
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WM)) {
|
||||||
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_WM),
|
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_WM),
|
||||||
pipeline, partial.wm, wm) {
|
pipeline, partial.wm, wm) {
|
||||||
SET(wm, wm, ForceThreadDispatchEnable);
|
|
||||||
SET(wm, wm, LineStippleEnable);
|
SET(wm, wm, LineStippleEnable);
|
||||||
SET(wm, wm, BarycentricInterpolationMode);
|
SET(wm, wm, BarycentricInterpolationMode);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1642,26 +1642,6 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline,
|
||||||
} else {
|
} else {
|
||||||
wm.EarlyDepthStencilControl = EDSC_NORMAL;
|
wm.EarlyDepthStencilControl = EDSC_NORMAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Gen8 hardware tries to compute ThreadDispatchEnable for us but
|
|
||||||
* doesn't take into account KillPixels when no depth or stencil
|
|
||||||
* writes are enabled. In order for occlusion queries to work
|
|
||||||
* correctly with no attachments, we need to force-enable PS thread
|
|
||||||
* dispatch.
|
|
||||||
*
|
|
||||||
* The BDW docs are pretty clear that that this bit isn't validated
|
|
||||||
* and probably shouldn't be used in production:
|
|
||||||
*
|
|
||||||
* "This must always be set to Normal. This field should not be
|
|
||||||
* tested for functional validation."
|
|
||||||
*
|
|
||||||
* Unfortunately, however, the other mechanism we have for doing this
|
|
||||||
* is 3DSTATE_PS_EXTRA::PixelShaderHasUAV which causes hangs on BDW.
|
|
||||||
* Given two bad options, we choose the one which works.
|
|
||||||
*/
|
|
||||||
pipeline->force_fragment_thread_dispatch =
|
|
||||||
wm_prog_data->has_side_effects ||
|
|
||||||
wm_prog_data->uses_kill;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue