diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 4466955a37e..58f2a51d061 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -238,6 +238,8 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dy RADV_CMP_COPY(vk.dr.enable, RADV_DYNAMIC_DISCARD_RECTANGLE_ENABLE); RADV_CMP_COPY(vk.dr.mode, RADV_DYNAMIC_DISCARD_RECTANGLE_MODE); + RADV_CMP_COPY(feedback_loop_aspects, RADV_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE); + #undef RADV_CMP_COPY cmd_buffer->state.dirty |= dest_mask; @@ -1902,6 +1904,10 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) cmd_buffer->state.emitted_graphics_pipeline->rast_prim != pipeline->rast_prim) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES; + + if (cmd_buffer->state.emitted_graphics_pipeline->db_shader_control != + pipeline->db_shader_control) + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE; } radeon_emit_array(cmd_buffer->cs, pipeline->base.cs.buf, pipeline->base.cs.cdw); @@ -4449,6 +4455,31 @@ radv_emit_line_rasterization_mode(struct radv_cmd_buffer *cmd_buffer) d->vk.rs.line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT)); } +static void +radv_emit_attachment_feedback_loop_enable(struct radv_cmd_buffer *cmd_buffer) +{ + const struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; + const struct radv_shader *ps = cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT]; + const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; + unsigned db_shader_control = pipeline->db_shader_control; + const bool uses_ds_feedback_loop = + !!(d->feedback_loop_aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)); + unsigned z_order; + + /* When a depth/stencil attachment is used inside feedback loops, use LATE_Z to make sure shader + * invocations read the correct value. + */ + if (!uses_ds_feedback_loop && (ps->info.ps.early_fragment_test || !ps->info.ps.writes_memory)) { + z_order = V_02880C_EARLY_Z_THEN_LATE_Z; + } else { + z_order = V_02880C_LATE_Z; + } + + db_shader_control |= S_02880C_Z_ORDER(z_order); + + radeon_set_context_reg(cmd_buffer->cs, R_02880C_DB_SHADER_CONTROL, db_shader_control); +} + static void radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty) { @@ -4582,6 +4613,9 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer, bool pip RADV_CMD_DIRTY_DYNAMIC_LINE_RASTERIZATION_MODE)) radv_emit_msaa_state(cmd_buffer); + if (states & RADV_CMD_DIRTY_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE) + radv_emit_attachment_feedback_loop_enable(cmd_buffer); + cmd_buffer->state.dirty &= ~states; } @@ -7524,6 +7558,18 @@ radv_CmdSetDiscardRectangleModeEXT(VkCommandBuffer commandBuffer, state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE_MODE; } +VKAPI_ATTR void VKAPI_CALL +radv_CmdSetAttachmentFeedbackLoopEnableEXT(VkCommandBuffer commandBuffer, + VkImageAspectFlags aspectMask) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + + state->dynamic.feedback_loop_aspects = aspectMask; + + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE; +} + VKAPI_ATTR void VKAPI_CALL radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, const VkCommandBuffer *pCmdBuffers) diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index 230c1cabf36..d6a99b2686b 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -58,10 +58,6 @@ struct radv_blend_state { uint32_t cb_shader_mask; }; -struct radv_depth_stencil_state { - uint32_t db_shader_control; -}; - static bool radv_is_static_vrs_enabled(const struct radv_graphics_pipeline *pipeline, const struct vk_graphics_pipeline_state *state) @@ -540,6 +536,8 @@ radv_dynamic_state_mask(VkDynamicState state) return RADV_DYNAMIC_DISCARD_RECTANGLE_ENABLE; case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_MODE_EXT: return RADV_DYNAMIC_DISCARD_RECTANGLE_MODE; + case VK_DYNAMIC_STATE_ATTACHMENT_FEEDBACK_LOOP_ENABLE_EXT: + return RADV_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE; default: unreachable("Unhandled dynamic state"); } @@ -894,9 +892,32 @@ radv_pipeline_init_input_assembly_state(const struct radv_device *device, pipeline->ia_multi_vgt_param = radv_compute_ia_multi_vgt_param_helpers(device, pipeline); } +static bool +radv_pipeline_uses_ds_feedback_loop(const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct vk_graphics_pipeline_state *state) +{ + VK_FROM_HANDLE(vk_render_pass, render_pass, state->rp->render_pass); + + if (render_pass) { + uint32_t subpass_idx = state->rp->subpass; + struct vk_subpass *subpass = &render_pass->subpasses[subpass_idx]; + struct vk_subpass_attachment *ds_att = subpass->depth_stencil_attachment; + + for (uint32_t i = 0; i < subpass->input_count; i++) { + if (ds_att && ds_att->attachment == subpass->input_attachments[i].attachment) { + return true; + } + } + } + + return (pCreateInfo->flags & + VK_PIPELINE_CREATE_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT) != 0; +} + static void radv_pipeline_init_dynamic_state(struct radv_graphics_pipeline *pipeline, - const struct vk_graphics_pipeline_state *state) + const struct vk_graphics_pipeline_state *state, + const VkGraphicsPipelineCreateInfo *pCreateInfo) { uint64_t needed_states = radv_pipeline_needed_dynamic_state(pipeline, state); struct radv_dynamic_state *dynamic = &pipeline->dynamic_state; @@ -1180,50 +1201,25 @@ radv_pipeline_init_dynamic_state(struct radv_graphics_pipeline *pipeline, dynamic->vk.dr.mode = state->dr->mode; } - pipeline->dynamic_state.mask = states; -} + if (states & RADV_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE) { + bool uses_ds_feedback_loop = radv_pipeline_uses_ds_feedback_loop(pCreateInfo, state); -static bool -radv_pipeline_uses_ds_feedback_loop(const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct vk_graphics_pipeline_state *state) -{ - VK_FROM_HANDLE(vk_render_pass, render_pass, state->rp->render_pass); - - if (render_pass) { - uint32_t subpass_idx = state->rp->subpass; - struct vk_subpass *subpass = &render_pass->subpasses[subpass_idx]; - struct vk_subpass_attachment *ds_att = subpass->depth_stencil_attachment; - - for (uint32_t i = 0; i < subpass->input_count; i++) { - if (ds_att && ds_att->attachment == subpass->input_attachments[i].attachment) { - return true; - } - } + dynamic->feedback_loop_aspects = + uses_ds_feedback_loop ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) + : VK_IMAGE_ASPECT_NONE; } - return (pCreateInfo->flags & - VK_PIPELINE_CREATE_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT) != 0; + pipeline->dynamic_state.mask = states; } static uint32_t radv_compute_db_shader_control(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline, - const struct vk_graphics_pipeline_state *state, - const VkGraphicsPipelineCreateInfo *pCreateInfo) + const struct vk_graphics_pipeline_state *state) { const struct radv_physical_device *pdevice = device->physical_device; - bool uses_ds_feedback_loop = radv_pipeline_uses_ds_feedback_loop(pCreateInfo, state); struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; unsigned conservative_z_export = V_02880C_EXPORT_ANY_Z; - unsigned z_order; - - /* When a depth/stencil attachment is used inside feedback loops, use LATE_Z to make sure shader - * invocations read the correct value. - */ - if (!uses_ds_feedback_loop && (ps->info.ps.early_fragment_test || !ps->info.ps.writes_memory)) - z_order = V_02880C_EARLY_Z_THEN_LATE_Z; - else - z_order = V_02880C_LATE_Z; if (ps->info.ps.depth_layout == FRAG_DEPTH_LAYOUT_GREATER) conservative_z_export = V_02880C_EXPORT_GREATER_THAN_Z; @@ -1247,7 +1243,7 @@ radv_compute_db_shader_control(const struct radv_device *device, S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.ps.writes_stencil) | S_02880C_KILL_ENABLE(!!ps->info.ps.can_discard) | S_02880C_MASK_EXPORT_ENABLE(mask_export_enable) | - S_02880C_CONSERVATIVE_Z_EXPORT(conservative_z_export) | S_02880C_Z_ORDER(z_order) | + S_02880C_CONSERVATIVE_Z_EXPORT(conservative_z_export) | S_02880C_DEPTH_BEFORE_SHADER(ps->info.ps.early_fragment_test) | S_02880C_PRE_SHADER_DEPTH_COVERAGE_ENABLE(ps->info.ps.post_depth_coverage) | S_02880C_EXEC_ON_HIER_FAIL(ps->info.ps.writes_memory) | @@ -1257,20 +1253,6 @@ radv_compute_db_shader_control(const struct radv_device *device, S_02880C_OVERRIDE_INTRINSIC_RATE(export_conflict_wa ? 2 : 0); } -static struct radv_depth_stencil_state -radv_pipeline_init_depth_stencil_state(const struct radv_device *device, - struct radv_graphics_pipeline *pipeline, - const struct vk_graphics_pipeline_state *state, - const VkGraphicsPipelineCreateInfo *pCreateInfo) -{ - struct radv_depth_stencil_state ds_state = {0}; - - ds_state.db_shader_control = - radv_compute_db_shader_control(device, pipeline, state, pCreateInfo); - - return ds_state; -} - static void gfx10_emit_ge_pc_alloc(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, uint32_t oversub_pc_lines) @@ -2898,13 +2880,6 @@ done: return result; } -static void -radv_pipeline_emit_depth_stencil_state(struct radeon_cmdbuf *ctx_cs, - const struct radv_depth_stencil_state *ds_state) -{ - radeon_set_context_reg(ctx_cs, R_02880C_DB_SHADER_CONTROL, ds_state->db_shader_control); -} - static void radv_pipeline_emit_blend_state(struct radeon_cmdbuf *ctx_cs, const struct radv_graphics_pipeline *pipeline, @@ -3738,9 +3713,7 @@ gfx103_pipeline_emit_vrs_state(const struct radv_device *device, struct radeon_c static void radv_pipeline_emit_pm4(const struct radv_device *device, struct radv_graphics_pipeline *pipeline, - const struct radv_blend_state *blend, - const struct radv_depth_stencil_state *ds_state, - uint32_t vgt_gs_out_prim_type, + const struct radv_blend_state *blend, uint32_t vgt_gs_out_prim_type, const struct vk_graphics_pipeline_state *state) { @@ -3755,7 +3728,6 @@ radv_pipeline_emit_pm4(const struct radv_device *device, struct radv_graphics_pi cs->buf = malloc(4 * (cs->max_dw + ctx_cs->max_dw)); ctx_cs->buf = cs->buf + cs->max_dw; - radv_pipeline_emit_depth_stencil_state(ctx_cs, ds_state); radv_pipeline_emit_blend_state(ctx_cs, pipeline, blend); radv_pipeline_emit_vgt_gs_mode(device, ctx_cs, pipeline); @@ -4087,10 +4059,7 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv if (!radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) radv_pipeline_init_input_assembly_state(device, pipeline); - radv_pipeline_init_dynamic_state(pipeline, &state); - - struct radv_depth_stencil_state ds_state = - radv_pipeline_init_depth_stencil_state(device, pipeline, &state, pCreateInfo); + radv_pipeline_init_dynamic_state(pipeline, &state, pCreateInfo); if (device->physical_device->rad_info.gfx_level >= GFX10_3) gfx103_pipeline_init_vrs_state(pipeline, &state); @@ -4162,6 +4131,7 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv pipeline->base.push_constant_size = pipeline_layout.push_constant_size; pipeline->base.dynamic_offset_count = pipeline_layout.dynamic_offset_count; + pipeline->db_shader_control = radv_compute_db_shader_control(device, pipeline, &state); for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) { if (pipeline->base.shaders[i]) { @@ -4179,7 +4149,7 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv radv_pipeline_init_extra(pipeline, extra, &blend, &state, &vgt_gs_out_prim_type); } - radv_pipeline_emit_pm4(device, pipeline, &blend, &ds_state, vgt_gs_out_prim_type, &state); + radv_pipeline_emit_pm4(device, pipeline, &blend, vgt_gs_out_prim_type, &state); radv_pipeline_layout_finish(device, &pipeline_layout); return result; diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index c12786da17a..1a4268383a1 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1278,7 +1278,8 @@ enum radv_dynamic_state_bits { RADV_DYNAMIC_COLOR_BLEND_EQUATION = 1ull << 45, RADV_DYNAMIC_DISCARD_RECTANGLE_ENABLE = 1ull << 46, RADV_DYNAMIC_DISCARD_RECTANGLE_MODE = 1ull << 47, - RADV_DYNAMIC_ALL = (1ull << 48) - 1, + RADV_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE = 1ull << 48, + RADV_DYNAMIC_ALL = (1ull << 49) - 1, }; enum radv_cmd_dirty_bits { @@ -1332,16 +1333,17 @@ enum radv_cmd_dirty_bits { RADV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_EQUATION = 1ull << 45, RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE_ENABLE = 1ull << 46, RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE_MODE = 1ull << 47, - RADV_CMD_DIRTY_DYNAMIC_ALL = (1ull << 48) - 1, - RADV_CMD_DIRTY_PIPELINE = 1ull << 48, - RADV_CMD_DIRTY_INDEX_BUFFER = 1ull << 49, - RADV_CMD_DIRTY_FRAMEBUFFER = 1ull << 50, - RADV_CMD_DIRTY_VERTEX_BUFFER = 1ull << 51, - RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1ull << 52, - RADV_CMD_DIRTY_GUARDBAND = 1ull << 53, - RADV_CMD_DIRTY_RBPLUS = 1ull << 54, - RADV_CMD_DIRTY_NGG_QUERY = 1ull << 55, - RADV_CMD_DIRTY_OCCLUSION_QUERY = 1ull << 56, + RADV_CMD_DIRTY_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE = 1ull << 48, + RADV_CMD_DIRTY_DYNAMIC_ALL = (1ull << 49) - 1, + RADV_CMD_DIRTY_PIPELINE = 1ull << 49, + RADV_CMD_DIRTY_INDEX_BUFFER = 1ull << 50, + RADV_CMD_DIRTY_FRAMEBUFFER = 1ull << 51, + RADV_CMD_DIRTY_VERTEX_BUFFER = 1ull << 52, + RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1ull << 53, + RADV_CMD_DIRTY_GUARDBAND = 1ull << 54, + RADV_CMD_DIRTY_RBPLUS = 1ull << 55, + RADV_CMD_DIRTY_NGG_QUERY = 1ull << 56, + RADV_CMD_DIRTY_OCCLUSION_QUERY = 1ull << 57, }; enum radv_cmd_flush_bits { @@ -1447,6 +1449,8 @@ struct radv_dynamic_state { } hw_vp; struct radv_sample_locations_state sample_location; + + VkImageAspectFlags feedback_loop_aspects; }; const char *radv_get_debug_option_name(int id); @@ -2270,6 +2274,7 @@ struct radv_graphics_pipeline { uint32_t attrib_ends[MAX_VERTEX_ATTRIBS]; uint32_t attrib_index_offset[MAX_VERTEX_ATTRIBS]; uint32_t db_render_control; + uint32_t db_shader_control; /* Last pre-PS API stage */ gl_shader_stage last_vgt_api_stage;