From adf18761f89a39ee4ada6ffb3ca4c7eedb6b67ae Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 5 Jan 2026 15:38:56 +0200 Subject: [PATCH] anv: rework color_aux operation tracking The current tracking seems to have hidden issues related to MCS ambiguate that are currently hidden by the fact that we're inserting pb-stall+RT-flush on BTI changes which we're going to be remove in the next commits. The issues appear to be related to a missing pb-stall+RT-flush between MCS ambiguate and fast-clear causing failures on the following tests once BTP+BTI RCC caching is enabled : dEQP-VK.pipeline.*.multisample.misc.*multi* dEQP-VK.pipeline.*.framebuffer_attachment.diff_attachments_2d_32x32_39x41_ms dEQP-VK.pipeline.*.framebuffer_attachment.diff_attachments_2d_32x32_48x48_ms Here we rework the tracking with a new enum to track 3 classes of operations. Signed-off-by: Lionel Landwerlin Reviewed-by: Nanley Chery Part-of: --- src/intel/vulkan/anv_genX.h | 2 +- src/intel/vulkan/anv_private.h | 17 ++++++- src/intel/vulkan/genX_blorp_exec.c | 32 +++++++------ src/intel/vulkan/genX_cmd_buffer.c | 72 ++++++++++++----------------- src/intel/vulkan/genX_cmd_compute.c | 4 +- src/intel/vulkan/genX_cmd_draw.c | 2 +- 6 files changed, 68 insertions(+), 61 deletions(-) diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 6829fe7a821..920d4799f5b 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -95,7 +95,7 @@ void genX(batch_emit_push_constants)(struct anv_batch *batch, void genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer, - enum isl_aux_op aux_op); + enum anv_color_aux_op_class aux_op); void genX(cmd_buffer_emit_gfx12_depth_wa)(struct anv_cmd_buffer *cmd_buffer, const struct isl_surf *surf); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 0edd7a204c1..ad9fac02991 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -4741,6 +4741,21 @@ enum anv_cmd_descriptor_buffer_mode { ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER, }; +enum anv_color_aux_op_class { + /* Non color related operation class or rendering */ + ANV_COLOR_AUX_OP_CLASS_NONE, + /* Software managed ambiguate operation class (MCS & CCS-pre-gfx11) */ + ANV_COLOR_AUX_OP_CLASS_SW_AMBIGUATE, + /* Hardware managed ambiguate operation class (CCS gfx11+) */ + ANV_COLOR_AUX_OP_CLASS_HW_AMBIGUATE, + /* Fast clear (includes CCS ambiguate) */ + ANV_COLOR_AUX_OP_CLASS_FAST_CLEAR, + /* Resolves HW managed */ + ANV_COLOR_AUX_OP_CLASS_HW_RESOLVE, + /* Resolves SW managed */ + ANV_COLOR_AUX_OP_CLASS_SW_RESOLVE, +}; + /** State required while building cmd buffer */ struct anv_cmd_state { /* PIPELINE_SELECT.PipelineSelection */ @@ -4836,7 +4851,7 @@ struct anv_cmd_state { /* The last auxiliary surface operation (or equivalent operation) provided * to genX(cmd_buffer_update_color_aux_op). */ - enum isl_aux_op color_aux_op; + enum anv_color_aux_op_class color_aux_op; /** * Whether RHWO optimization is enabled (Wa_1508744258 and Wa_14024015672). diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c index c55cac45059..c7e15324ec1 100644 --- a/src/intel/vulkan/genX_blorp_exec.c +++ b/src/intel/vulkan/genX_blorp_exec.c @@ -491,32 +491,36 @@ blorp_exec_on_blitter(struct blorp_batch *batch, blorp_exec(batch, params); } -static enum isl_aux_op +static enum anv_color_aux_op_class get_color_aux_op(const struct blorp_params *params) { switch (params->op) { - case BLORP_OP_CCS_RESOLVE: - case BLORP_OP_CCS_PARTIAL_RESOLVE: case BLORP_OP_CCS_COLOR_CLEAR: case BLORP_OP_MCS_COLOR_CLEAR: assert(params->fast_clear_op != ISL_AUX_OP_NONE); - return params->fast_clear_op; + return ANV_COLOR_AUX_OP_CLASS_FAST_CLEAR; /* Some auxiliary surface operations are not provided by hardware. To * provide that functionality, BLORP sometimes tries to emulate what - * hardware would do with custom pixel shaders. For now, we assume that - * BLORP's implementation has the same cache invalidation and flushing - * requirements as similar hardware operations. + * hardware would do with custom pixel shaders.. */ case BLORP_OP_CCS_AMBIGUATE: - assert(GFX_VER >= 11 || params->fast_clear_op == ISL_AUX_OP_NONE); - return ISL_AUX_OP_AMBIGUATE; + if (params->fast_clear_op == ISL_AUX_OP_NONE) { + return ANV_COLOR_AUX_OP_CLASS_SW_AMBIGUATE; + } else { + assert(GFX_VER >= 11); + return ANV_COLOR_AUX_OP_CLASS_HW_AMBIGUATE; + } case BLORP_OP_MCS_AMBIGUATE: assert(params->fast_clear_op == ISL_AUX_OP_NONE); - return ISL_AUX_OP_AMBIGUATE; + return ANV_COLOR_AUX_OP_CLASS_SW_AMBIGUATE; + case BLORP_OP_CCS_RESOLVE: + case BLORP_OP_CCS_PARTIAL_RESOLVE: + assert(params->fast_clear_op != ISL_AUX_OP_NONE); + return ANV_COLOR_AUX_OP_CLASS_HW_RESOLVE; case BLORP_OP_MCS_PARTIAL_RESOLVE: assert(params->fast_clear_op == ISL_AUX_OP_NONE); - return ISL_AUX_OP_PARTIAL_RESOLVE; + return ANV_COLOR_AUX_OP_CLASS_SW_RESOLVE; /* If memory aliasing is being done on an image, a pending fast clear * could hit the destination address at an unknown time. Go back to the @@ -532,7 +536,7 @@ get_color_aux_op(const struct blorp_params *params) case BLORP_OP_SLOW_DEPTH_STENCIL_CLEAR: case BLORP_OP_SLOW_DEPTH_CLEAR: assert(params->fast_clear_op == ISL_AUX_OP_NONE); - return ISL_AUX_OP_NONE; + return ANV_COLOR_AUX_OP_CLASS_NONE; /* The remaining operations are considered regular draws. */ case BLORP_OP_LINEAR_SURFACE_CLEAR: @@ -540,7 +544,7 @@ get_color_aux_op(const struct blorp_params *params) case BLORP_OP_BLIT: case BLORP_OP_COPY: assert(params->fast_clear_op == ISL_AUX_OP_NONE); - return ISL_AUX_OP_NONE; + return ANV_COLOR_AUX_OP_CLASS_NONE; } UNREACHABLE("Invalid value in params->op"); @@ -563,7 +567,7 @@ genX(blorp_exec)(struct blorp_batch *batch, } /* Flush any in-progress CCS/MCS operations as needed. */ - const enum isl_aux_op aux_op = get_color_aux_op(params); + const enum anv_color_aux_op_class aux_op = get_color_aux_op(params); genX(cmd_buffer_update_color_aux_op(cmd_buffer, aux_op)); if (batch->flags & BLORP_BATCH_USE_BLITTER) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 182a540aeea..33170daa2f2 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -3464,50 +3464,38 @@ genX(cmd_buffer_begin_companion)(struct anv_cmd_buffer *cmd_buffer, } static bool -aux_op_resolves(enum isl_aux_op aux_op) +is_hw_managed_fast_clear(enum anv_color_aux_op_class op) { - return aux_op == ISL_AUX_OP_FULL_RESOLVE || - aux_op == ISL_AUX_OP_PARTIAL_RESOLVE; -} - -static bool -aux_op_clears(enum isl_aux_op aux_op) -{ - return aux_op == ISL_AUX_OP_FAST_CLEAR || - aux_op == ISL_AUX_OP_AMBIGUATE; -} - -static bool -aux_op_renders(enum isl_aux_op aux_op) -{ - return aux_op == ISL_AUX_OP_NONE; + return op == ANV_COLOR_AUX_OP_CLASS_FAST_CLEAR || + op == ANV_COLOR_AUX_OP_CLASS_HW_AMBIGUATE; } static void add_pending_pipe_bits_for_color_aux_op(struct anv_cmd_buffer *cmd_buffer, - enum isl_aux_op next_aux_op, + enum anv_color_aux_op_class next_aux_op, enum anv_pipe_bits pipe_bits, const char *reason) { - const enum isl_aux_op last_aux_op = cmd_buffer->state.color_aux_op; + const enum anv_color_aux_op_class last_aux_op = cmd_buffer->state.color_aux_op; assert(next_aux_op != last_aux_op); anv_add_pending_pipe_bits(cmd_buffer, - aux_op_clears(next_aux_op) ? + is_hw_managed_fast_clear(next_aux_op) ? VK_PIPELINE_STAGE_2_NONE : VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT, - aux_op_clears(next_aux_op) ? + is_hw_managed_fast_clear(next_aux_op) ? VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT : 0, pipe_bits, reason); } void genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer, - enum isl_aux_op next_aux_op) + enum anv_color_aux_op_class next_aux_op) { - const enum isl_aux_op last_aux_op = cmd_buffer->state.color_aux_op; + const enum anv_color_aux_op_class last_aux_op = cmd_buffer->state.color_aux_op; - if (!aux_op_clears(last_aux_op) && aux_op_clears(next_aux_op)) { + if (!is_hw_managed_fast_clear(last_aux_op) && + is_hw_managed_fast_clear(next_aux_op)) { #if GFX_VER >= 20 /* From the Xe2 Bspec 57340 (r59562), * "MCS/CCS Buffers, Fast Clear for Render Target(s)": @@ -3529,7 +3517,7 @@ genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer, */ add_pending_pipe_bits_for_color_aux_op( cmd_buffer, next_aux_op, ANV_PIPE_RT_BTI_CHANGE, - "aux color !aux->aux"); + "aux color !fast-clear->fast-clear"); #elif GFX_VERx10 == 125 /* From the ACM Bspec 47704 (r52663), "Render Target Fast Clear": @@ -3556,7 +3544,7 @@ genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer, ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | ANV_PIPE_DATA_CACHE_FLUSH_BIT | ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT, - "aux color !aux->aux"); + "aux color !fast-clear->fast-clear"); #elif GFX_VERx10 == 120 /* From the TGL Bspec 47704 (r52663), "Render Target Fast Clear": @@ -3580,7 +3568,7 @@ genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer, ANV_PIPE_TILE_CACHE_FLUSH_BIT | ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT, - "aux color !aux->aux"); + "aux color !fast-clear->fast-clear"); #else /* From the Sky Lake PRM Vol. 7, "MCS Buffer for Render Target(s)": @@ -3609,9 +3597,10 @@ genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer, cmd_buffer, next_aux_op, ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_END_OF_PIPE_SYNC_BIT, - "aux color !aux->aux"); + "aux color !fast-clear->fast-clear"); #endif - } else if (aux_op_clears(last_aux_op) && !aux_op_clears(next_aux_op)) { + } else if (is_hw_managed_fast_clear(last_aux_op) && + !is_hw_managed_fast_clear(next_aux_op)) { #if GFX_VERx10 >= 125 /* From the ACM PRM Vol. 9, "Color Fast Clear Synchronization": * @@ -3623,7 +3612,7 @@ genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer, */ add_pending_pipe_bits_for_color_aux_op( cmd_buffer, next_aux_op, ANV_PIPE_RT_BTI_CHANGE, - "aux color aux->!aux"); + "aux color fast-clear->!fast-clear"); #elif GFX_VERx10 == 120 /* From the TGL PRM Vol. 9, "Color Fast Clear Synchronization": @@ -3648,7 +3637,7 @@ genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer, ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_L3_FABRIC_FLUSH_BIT | ANV_PIPE_DEPTH_STALL_BIT, - "aux color aux->!aux"); + "aux color fast-clear->!fast-clear"); #else /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear": @@ -3667,11 +3656,11 @@ genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer, cmd_buffer, next_aux_op, ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_END_OF_PIPE_SYNC_BIT, - "aux color aux->!aux"); + "aux color fast-clear->!fast-clear"); #endif - - } else if (aux_op_renders(last_aux_op) != aux_op_renders(next_aux_op)) { - assert(aux_op_resolves(last_aux_op) != aux_op_resolves(next_aux_op)); + } else if (last_aux_op != next_aux_op && + !is_hw_managed_fast_clear(last_aux_op) && + !is_hw_managed_fast_clear(next_aux_op)) { /* From the Sky Lake PRM Vol. 7, "MCS Buffer for Render Target(s)": * * Any transition from any value in {Clear, Render, Resolve} to a @@ -3691,11 +3680,11 @@ genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer, cmd_buffer, next_aux_op, ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_END_OF_PIPE_SYNC_BIT, - "aux color render->!render"); + "aux color change (non fast-clear)"); } - if (last_aux_op != ISL_AUX_OP_FAST_CLEAR && - next_aux_op == ISL_AUX_OP_FAST_CLEAR && + if (last_aux_op != ANV_COLOR_AUX_OP_CLASS_FAST_CLEAR && + next_aux_op == ANV_COLOR_AUX_OP_CLASS_FAST_CLEAR && cmd_buffer->device->isl_dev.ss.clear_color_state_size > 0) { /* From the ICL PRM Vol. 9, "State Caching": * @@ -3718,9 +3707,8 @@ genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer, } /* Update the auxiliary surface operation, but with one exception. */ - if (last_aux_op == ISL_AUX_OP_FAST_CLEAR && - next_aux_op == ISL_AUX_OP_AMBIGUATE) { - assert(aux_op_clears(last_aux_op) && aux_op_clears(next_aux_op)); + if (last_aux_op == ANV_COLOR_AUX_OP_CLASS_HW_AMBIGUATE && + next_aux_op == ANV_COLOR_AUX_OP_CLASS_FAST_CLEAR) { /* Fast clears and ambiguates are in the same class of operation, but * fast clears have more stringent synchronization requirements. For * better performance, don't replace the current fast clear operation @@ -4045,7 +4033,7 @@ end_command_buffer(struct anv_cmd_buffer *cmd_buffer) } /* Flush any in-progress CCS/MCS operations in preparation for chaining. */ - genX(cmd_buffer_update_color_aux_op(cmd_buffer, ISL_AUX_OP_NONE)); + genX(cmd_buffer_update_color_aux_op)(cmd_buffer, ANV_COLOR_AUX_OP_CLASS_NONE); genX(cmd_buffer_flush_generated_draws)(cmd_buffer); @@ -4158,7 +4146,7 @@ genX(CmdExecuteCommands)( /* Ensure we're in a regular drawing cache mode (assumption for all * secondary). */ - genX(cmd_buffer_update_color_aux_op(container, ISL_AUX_OP_NONE)); + genX(cmd_buffer_update_color_aux_op)(container, ANV_COLOR_AUX_OP_CLASS_NONE); /* The secondary command buffer doesn't know which textures etc. have been * flushed prior to their execution. Apply those flushes now. diff --git a/src/intel/vulkan/genX_cmd_compute.c b/src/intel/vulkan/genX_cmd_compute.c index c65131a5610..9808cd7b260 100644 --- a/src/intel/vulkan/genX_cmd_compute.c +++ b/src/intel/vulkan/genX_cmd_compute.c @@ -100,7 +100,7 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) comp_state->shader->prog_data->total_shared > 0 ? device->l3_slm_config : device->l3_config); - genX(cmd_buffer_update_color_aux_op(cmd_buffer, ISL_AUX_OP_NONE)); + genX(cmd_buffer_update_color_aux_op)(cmd_buffer, ANV_COLOR_AUX_OP_CLASS_NONE); genX(flush_descriptor_buffers)(cmd_buffer, &comp_state->base, VK_SHADER_STAGE_COMPUTE_BIT); @@ -1162,7 +1162,7 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer, genX(cmd_buffer_config_l3)(cmd_buffer, device->l3_config); - genX(cmd_buffer_update_color_aux_op(cmd_buffer, ISL_AUX_OP_NONE)); + genX(cmd_buffer_update_color_aux_op)(cmd_buffer, ANV_COLOR_AUX_OP_CLASS_NONE); genX(flush_descriptor_buffers)(cmd_buffer, &rt->base, ANV_RT_STAGE_BITS); diff --git a/src/intel/vulkan/genX_cmd_draw.c b/src/intel/vulkan/genX_cmd_draw.c index cd8c3c08e6d..7e46d11813e 100644 --- a/src/intel/vulkan/genX_cmd_draw.c +++ b/src/intel/vulkan/genX_cmd_draw.c @@ -801,7 +801,7 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer) genX(cmd_buffer_config_l3)(cmd_buffer, device->l3_config); - genX(cmd_buffer_update_color_aux_op(cmd_buffer, ISL_AUX_OP_NONE)); + genX(cmd_buffer_update_color_aux_op)(cmd_buffer, ANV_COLOR_AUX_OP_CLASS_NONE); genX(cmd_buffer_emit_hashing_mode)(cmd_buffer, UINT_MAX, UINT_MAX, 1);