diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 20c23f1b8f8..fb23e278fb7 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1583,6 +1583,7 @@ struct anv_gfx_dynamic_state { /* 3DSTATE_PS_EXTRA */ struct { + bool PixelShaderHasUAV; bool PixelShaderIsPerSample; bool PixelShaderKillsPixel; bool PixelShaderIsPerCoarsePixel; @@ -1711,7 +1712,6 @@ struct anv_gfx_dynamic_state { /* 3DSTATE_WM */ struct { - uint32_t ForceThreadDispatchEnable; bool LineStippleEnable; uint32_t BarycentricInterpolationMode; } wm; @@ -4707,7 +4707,6 @@ struct anv_graphics_pipeline { bool rp_has_ds_self_dep; bool kill_pixel; - bool force_fragment_thread_dispatch; bool uses_xfb; bool sample_shading_enable; float min_sample_shading; diff --git a/src/intel/vulkan/genX_gfx_state.c b/src/intel/vulkan/genX_gfx_state.c index 5cff74b171e..0513f792e81 100644 --- a/src/intel/vulkan/genX_gfx_state.c +++ b/src/intel/vulkan/genX_gfx_state.c @@ -1012,19 +1012,6 @@ genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer) BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS_ENABLE))) BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SAMPLE_PATTERN); - if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) || - (gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) { - /* 3DSTATE_WM in the hope we can avoid spawning fragment shaders - * threads. - */ - bool force_thread_dispatch = - anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) && - (pipeline->force_fragment_thread_dispatch || - anv_cmd_buffer_all_color_write_masked(cmd_buffer)); - SET(WM, wm.ForceThreadDispatchEnable, force_thread_dispatch ? ForceON : 0); - } - if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE)) { SET_STAGE(PS_EXTRA, ps_extra.PixelShaderKillsPixel, @@ -1034,6 +1021,33 @@ genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer) FRAGMENT); } +#if GFX_VERx10 >= 125 + if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) { + SET_STAGE(PS_EXTRA, ps_extra.PixelShaderHasUAV, + wm_prog_data && wm_prog_data->has_side_effects, + FRAGMENT); + } +#else + if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE)) { + /* Prior to Gfx12.5 the HW seems to avoid spawning fragment shaders even + * if 3DSTATE_PS_EXTRA::PixelShaderKillsPixel=true when + * 3DSTATE_PS_BLEND::HasWriteableRT=false. This is causing problems with + * occlusion queries with 0 attachments. There are no CTS tests + * exercising this but zink+anv fails a bunch of tests like piglit + * arb_framebuffer_no_attachments-query. + * + * Here we choose to tweak the PixelShaderHasUAV to make sure the + * fragment shaders are run properly. + */ + SET_STAGE(PS_EXTRA, ps_extra.PixelShaderHasUAV, + wm_prog_data && (wm_prog_data->has_side_effects || + (gfx->color_att_count == 0 && + gfx->n_occlusion_queries > 0)), + FRAGMENT); + } +#endif + if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) || (gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP) || @@ -1759,6 +1773,7 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer) if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_EXTRA)) { anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_PS_EXTRA), pipeline, partial.ps_extra, pse) { + SET(pse, ps_extra, PixelShaderHasUAV); SET(pse, ps_extra, PixelShaderIsPerSample); #if GFX_VER >= 11 SET(pse, ps_extra, PixelShaderIsPerCoarsePixel); @@ -2136,7 +2151,6 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer) if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WM)) { anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_WM), pipeline, partial.wm, wm) { - SET(wm, wm, ForceThreadDispatchEnable); SET(wm, wm, LineStippleEnable); SET(wm, wm, BarycentricInterpolationMode); } diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 33507d1a4d8..c54ced468f4 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -1642,26 +1642,6 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline, } else { wm.EarlyDepthStencilControl = EDSC_NORMAL; } - - /* Gen8 hardware tries to compute ThreadDispatchEnable for us but - * doesn't take into account KillPixels when no depth or stencil - * writes are enabled. In order for occlusion queries to work - * correctly with no attachments, we need to force-enable PS thread - * dispatch. - * - * The BDW docs are pretty clear that that this bit isn't validated - * and probably shouldn't be used in production: - * - * "This must always be set to Normal. This field should not be - * tested for functional validation." - * - * Unfortunately, however, the other mechanism we have for doing this - * is 3DSTATE_PS_EXTRA::PixelShaderHasUAV which causes hangs on BDW. - * Given two bad options, we choose the one which works. - */ - pipeline->force_fragment_thread_dispatch = - wm_prog_data->has_side_effects || - wm_prog_data->uses_kill; } } }