From 72dc596aa09112892a46693107ea098d5f2d0477 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 7 Jun 2023 13:31:56 +0300 Subject: [PATCH] anv: implement VK_EXT_attachment_feedback_loop_dynamic_state Signed-off-by: Lionel Landwerlin Reviewed-by: Ivan Briano Part-of: --- src/intel/vulkan/anv_cmd_buffer.c | 2 +- src/intel/vulkan/anv_device.c | 4 ++++ src/intel/vulkan/anv_private.h | 11 +++++++++-- src/intel/vulkan/genX_gfx_state.c | 33 +++++++++++++++++++++++++++---- src/intel/vulkan/genX_pipeline.c | 25 ++++++----------------- 5 files changed, 49 insertions(+), 26 deletions(-) diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 7ca0eafa406..a033ea09ad0 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -555,7 +555,6 @@ anv_cmd_buffer_flush_pipeline_state(struct anv_cmd_buffer *cmd_buffer, diff_fix_state(HS, final.hs); diff_fix_state(DS, final.ds); diff_fix_state(PS, final.ps); - diff_fix_state(PS_EXTRA, final.ps_extra); diff_fix_state(CLIP, partial.clip); diff_fix_state(SF, partial.sf); @@ -565,6 +564,7 @@ anv_cmd_buffer_flush_pipeline_state(struct anv_cmd_buffer *cmd_buffer, diff_fix_state(GS, partial.gs); diff_fix_state(TE, partial.te); diff_fix_state(VFG, partial.vfg); + diff_fix_state(PS_EXTRA, partial.ps_extra); if (cmd_buffer->device->vk.enabled_extensions.EXT_mesh_shader) { diff_fix_state(TASK_CONTROL, final.task_control); diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 6f4690130f8..9b6b74027ed 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -322,6 +322,7 @@ get_device_extensions(const struct anv_physical_device *device, .KHR_zero_initialize_workgroup_memory = true, .EXT_4444_formats = true, .EXT_attachment_feedback_loop_layout = true, + .EXT_attachment_feedback_loop_dynamic_state = true, .EXT_border_color_swizzle = true, .EXT_buffer_device_address = true, .EXT_calibrated_timestamps = device->has_reg_timestamp, @@ -884,6 +885,9 @@ get_features(const struct anv_physical_device *pdevice, /* VK_EXT_attachment_feedback_loop_layout */ .attachmentFeedbackLoopLayout = true, + + /* VK_EXT_attachment_feedback_loop_dynamic_state */ + .attachmentFeedbackLoopDynamicState = true, }; /* The new DOOM and Wolfenstein games require depthBounds without diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 15fe1e852c8..d9d17706a73 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1301,7 +1301,6 @@ enum anv_gfx_state_bits { ANV_GFX_STATE_DS, ANV_GFX_STATE_GS, ANV_GFX_STATE_PS, - ANV_GFX_STATE_PS_EXTRA, ANV_GFX_STATE_SBE_MESH, ANV_GFX_STATE_CLIP_MESH, ANV_GFX_STATE_MESH_CONTROL, @@ -1335,6 +1334,7 @@ enum anv_gfx_state_bits { ANV_GFX_STATE_VIEWPORT_SF_CLIP, ANV_GFX_STATE_WM, ANV_GFX_STATE_WM_DEPTH_STENCIL, + ANV_GFX_STATE_PS_EXTRA, ANV_GFX_STATE_PMA_FIX, /* Fake state to implement workaround */ ANV_GFX_STATE_WA_18019816803, /* Fake state to implement workaround */ ANV_GFX_STATE_TBIMR_TILE_PASS_INFO, @@ -1423,6 +1423,11 @@ struct anv_gfx_dynamic_state { uint32_t LineStippleRepeatCount; } ls; + /* 3DSTATE_PS_EXTRA */ + struct { + bool PixelShaderKillsPixel; + } ps_extra; + /* 3DSTATE_PS_BLEND */ struct { bool HasWriteableRT; @@ -4233,6 +4238,8 @@ struct anv_graphics_pipeline { uint32_t view_mask; uint32_t instance_multiplier; + bool rp_has_ds_self_dep; + bool kill_pixel; bool force_fragment_thread_dispatch; bool uses_xfb; @@ -4284,7 +4291,6 @@ struct anv_graphics_pipeline { struct anv_gfx_state_ptr hs; struct anv_gfx_state_ptr ds; struct anv_gfx_state_ptr ps; - struct anv_gfx_state_ptr ps_extra; struct anv_gfx_state_ptr task_control; struct anv_gfx_state_ptr task_shader; @@ -4303,6 +4309,7 @@ struct anv_graphics_pipeline { struct anv_gfx_state_ptr clip; struct anv_gfx_state_ptr sf; struct anv_gfx_state_ptr raster; + struct anv_gfx_state_ptr ps_extra; struct anv_gfx_state_ptr wm; struct anv_gfx_state_ptr so; struct anv_gfx_state_ptr gs; diff --git a/src/intel/vulkan/genX_gfx_state.c b/src/intel/vulkan/genX_gfx_state.c index 52afad44790..3a66f10c26f 100644 --- a/src/intel/vulkan/genX_gfx_state.c +++ b/src/intel/vulkan/genX_gfx_state.c @@ -126,8 +126,16 @@ get_cps_state_offset(struct anv_device *device, bool cps_enabled, } #endif /* GFX_VER >= 12 */ +static bool +has_ds_feedback_loop(const struct vk_dynamic_graphics_state *dyn) +{ + return dyn->feedback_loops & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT); +} + UNUSED static bool want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer, + const struct vk_dynamic_graphics_state *dyn, const struct vk_depth_stencil_state *ds) { if (GFX_VER > 9) @@ -240,6 +248,8 @@ want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer, * (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF) */ return pipeline->kill_pixel || + pipeline->rp_has_ds_self_dep || + has_ds_feedback_loop(dyn) || wm_prog_data->computed_depth_mode != PSCDEPTH_OFF; } @@ -792,7 +802,7 @@ genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer) genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare]); #if GFX_VER == 9 - const bool pma = want_stencil_pma_fix(cmd_buffer, &opt_ds); + const bool pma = want_stencil_pma_fix(cmd_buffer, dyn, &opt_ds); SET(PMA_FIX, pma_fix, pma); #endif @@ -861,6 +871,17 @@ genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer) SET(WM, wm.ForceThreadDispatchEnable, force_thread_dispatch ? ForceON : 0); } + if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE)) { + const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); + + SET_STAGE(PS_EXTRA, ps_extra.PixelShaderKillsPixel, + wm_prog_data && (pipeline->rp_has_ds_self_dep || + has_ds_feedback_loop(dyn) || + wm_prog_data->uses_kill), + FRAGMENT); + } + if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) || (gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP) || @@ -1462,9 +1483,6 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer) if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS)) anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.ps); - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_EXTRA)) - anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.ps_extra); - if (device->vk.enabled_extensions.EXT_mesh_shader) { if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_CONTROL)) anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.mesh_control); @@ -1507,6 +1525,13 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer) /* Now the potentially dynamic instructions */ + if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_EXTRA)) { + anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_PS_EXTRA), + pipeline, partial.ps_extra, pse) { + SET(pse, ps_extra, PixelShaderKillsPixel); + } + } + if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CLIP)) { anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_CLIP), pipeline, partial.clip, clip) { diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index a5f4cfa5dcf..9afcddf7568 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -1493,13 +1493,6 @@ emit_3dstate_gs(struct anv_graphics_pipeline *pipeline) } } -static bool -state_has_ds_self_dep(const struct vk_graphics_pipeline_state *state) -{ - return state->pipeline_flags & - VK_PIPELINE_CREATE_2_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT; -} - static void emit_3dstate_wm(struct anv_graphics_pipeline *pipeline, const struct vk_input_assembly_state *ia, @@ -1641,11 +1634,11 @@ emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline, const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { - anv_pipeline_emit(pipeline, final.ps_extra, GENX(3DSTATE_PS_EXTRA), ps); + anv_pipeline_emit(pipeline, partial.ps_extra, GENX(3DSTATE_PS_EXTRA), ps); return; } - anv_pipeline_emit(pipeline, final.ps_extra, GENX(3DSTATE_PS_EXTRA), ps) { + anv_pipeline_emit(pipeline, partial.ps_extra, GENX(3DSTATE_PS_EXTRA), ps) { ps.PixelShaderValid = true; #if GFX_VER < 20 ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0; @@ -1657,15 +1650,6 @@ emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline, ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w; - /* If the subpass has a depth or stencil self-dependency, then we need - * to force the hardware to do the depth/stencil write *after* fragment - * shader execution. Otherwise, the writes may hit memory before we get - * around to fetching from the input attachment and we may get the depth - * or stencil value from the current draw rather than the previous one. - */ - ps.PixelShaderKillsPixel = state_has_ds_self_dep(state) || - wm_prog_data->uses_kill; - ps.PixelShaderComputesStencil = wm_prog_data->computed_stencil; #if GFX_VER >= 20 assert(!wm_prog_data->pulls_bary); @@ -1735,8 +1719,11 @@ compute_kill_pixel(struct anv_graphics_pipeline *pipeline, * 3DSTATE_PS_BLEND::AlphaTestEnable since Vulkan doesn't have a concept * of an alpha test. */ + pipeline->rp_has_ds_self_dep = + (state->pipeline_flags & + VK_PIPELINE_CREATE_2_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT) != 0; pipeline->kill_pixel = - state_has_ds_self_dep(state) || + pipeline->rp_has_ds_self_dep || wm_prog_data->uses_kill || wm_prog_data->uses_omask || (ms && ms->alpha_to_coverage_enable);