anv: rework color_aux operation tracking

The current tracking seems to have hidden issues related to MCS
ambiguate that are currently hidden by the fact that we're inserting
pb-stall+RT-flush on BTI changes which we're going to be remove in the
next commits.

The issues appear to be related to a missing pb-stall+RT-flush between
MCS ambiguate and fast-clear causing failures on the following tests
once BTP+BTI RCC caching is enabled :

  dEQP-VK.pipeline.*.multisample.misc.*multi*
  dEQP-VK.pipeline.*.framebuffer_attachment.diff_attachments_2d_32x32_39x41_ms
  dEQP-VK.pipeline.*.framebuffer_attachment.diff_attachments_2d_32x32_48x48_ms

Here we rework the tracking with a new enum to track 3 classes of
operations.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Nanley Chery <nanley.g.chery@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39982>
This commit is contained in:
Lionel Landwerlin 2026-01-05 15:38:56 +02:00 committed by Marge Bot
parent ab10ee1dd4
commit adf18761f8
6 changed files with 68 additions and 61 deletions

View file

@ -95,7 +95,7 @@ void genX(batch_emit_push_constants)(struct anv_batch *batch,
void
genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer,
enum isl_aux_op aux_op);
enum anv_color_aux_op_class aux_op);
void genX(cmd_buffer_emit_gfx12_depth_wa)(struct anv_cmd_buffer *cmd_buffer,
const struct isl_surf *surf);

View file

@ -4741,6 +4741,21 @@ enum anv_cmd_descriptor_buffer_mode {
ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER,
};
enum anv_color_aux_op_class {
/* Non color related operation class or rendering */
ANV_COLOR_AUX_OP_CLASS_NONE,
/* Software managed ambiguate operation class (MCS & CCS-pre-gfx11) */
ANV_COLOR_AUX_OP_CLASS_SW_AMBIGUATE,
/* Hardware managed ambiguate operation class (CCS gfx11+) */
ANV_COLOR_AUX_OP_CLASS_HW_AMBIGUATE,
/* Fast clear (includes CCS ambiguate) */
ANV_COLOR_AUX_OP_CLASS_FAST_CLEAR,
/* Resolves HW managed */
ANV_COLOR_AUX_OP_CLASS_HW_RESOLVE,
/* Resolves SW managed */
ANV_COLOR_AUX_OP_CLASS_SW_RESOLVE,
};
/** State required while building cmd buffer */
struct anv_cmd_state {
/* PIPELINE_SELECT.PipelineSelection */
@ -4836,7 +4851,7 @@ struct anv_cmd_state {
/* The last auxiliary surface operation (or equivalent operation) provided
* to genX(cmd_buffer_update_color_aux_op).
*/
enum isl_aux_op color_aux_op;
enum anv_color_aux_op_class color_aux_op;
/**
* Whether RHWO optimization is enabled (Wa_1508744258 and Wa_14024015672).

View file

@ -491,32 +491,36 @@ blorp_exec_on_blitter(struct blorp_batch *batch,
blorp_exec(batch, params);
}
static enum isl_aux_op
static enum anv_color_aux_op_class
get_color_aux_op(const struct blorp_params *params)
{
switch (params->op) {
case BLORP_OP_CCS_RESOLVE:
case BLORP_OP_CCS_PARTIAL_RESOLVE:
case BLORP_OP_CCS_COLOR_CLEAR:
case BLORP_OP_MCS_COLOR_CLEAR:
assert(params->fast_clear_op != ISL_AUX_OP_NONE);
return params->fast_clear_op;
return ANV_COLOR_AUX_OP_CLASS_FAST_CLEAR;
/* Some auxiliary surface operations are not provided by hardware. To
* provide that functionality, BLORP sometimes tries to emulate what
* hardware would do with custom pixel shaders. For now, we assume that
* BLORP's implementation has the same cache invalidation and flushing
* requirements as similar hardware operations.
* hardware would do with custom pixel shaders..
*/
case BLORP_OP_CCS_AMBIGUATE:
assert(GFX_VER >= 11 || params->fast_clear_op == ISL_AUX_OP_NONE);
return ISL_AUX_OP_AMBIGUATE;
if (params->fast_clear_op == ISL_AUX_OP_NONE) {
return ANV_COLOR_AUX_OP_CLASS_SW_AMBIGUATE;
} else {
assert(GFX_VER >= 11);
return ANV_COLOR_AUX_OP_CLASS_HW_AMBIGUATE;
}
case BLORP_OP_MCS_AMBIGUATE:
assert(params->fast_clear_op == ISL_AUX_OP_NONE);
return ISL_AUX_OP_AMBIGUATE;
return ANV_COLOR_AUX_OP_CLASS_SW_AMBIGUATE;
case BLORP_OP_CCS_RESOLVE:
case BLORP_OP_CCS_PARTIAL_RESOLVE:
assert(params->fast_clear_op != ISL_AUX_OP_NONE);
return ANV_COLOR_AUX_OP_CLASS_HW_RESOLVE;
case BLORP_OP_MCS_PARTIAL_RESOLVE:
assert(params->fast_clear_op == ISL_AUX_OP_NONE);
return ISL_AUX_OP_PARTIAL_RESOLVE;
return ANV_COLOR_AUX_OP_CLASS_SW_RESOLVE;
/* If memory aliasing is being done on an image, a pending fast clear
* could hit the destination address at an unknown time. Go back to the
@ -532,7 +536,7 @@ get_color_aux_op(const struct blorp_params *params)
case BLORP_OP_SLOW_DEPTH_STENCIL_CLEAR:
case BLORP_OP_SLOW_DEPTH_CLEAR:
assert(params->fast_clear_op == ISL_AUX_OP_NONE);
return ISL_AUX_OP_NONE;
return ANV_COLOR_AUX_OP_CLASS_NONE;
/* The remaining operations are considered regular draws. */
case BLORP_OP_LINEAR_SURFACE_CLEAR:
@ -540,7 +544,7 @@ get_color_aux_op(const struct blorp_params *params)
case BLORP_OP_BLIT:
case BLORP_OP_COPY:
assert(params->fast_clear_op == ISL_AUX_OP_NONE);
return ISL_AUX_OP_NONE;
return ANV_COLOR_AUX_OP_CLASS_NONE;
}
UNREACHABLE("Invalid value in params->op");
@ -563,7 +567,7 @@ genX(blorp_exec)(struct blorp_batch *batch,
}
/* Flush any in-progress CCS/MCS operations as needed. */
const enum isl_aux_op aux_op = get_color_aux_op(params);
const enum anv_color_aux_op_class aux_op = get_color_aux_op(params);
genX(cmd_buffer_update_color_aux_op(cmd_buffer, aux_op));
if (batch->flags & BLORP_BATCH_USE_BLITTER)

View file

@ -3464,50 +3464,38 @@ genX(cmd_buffer_begin_companion)(struct anv_cmd_buffer *cmd_buffer,
}
static bool
aux_op_resolves(enum isl_aux_op aux_op)
is_hw_managed_fast_clear(enum anv_color_aux_op_class op)
{
return aux_op == ISL_AUX_OP_FULL_RESOLVE ||
aux_op == ISL_AUX_OP_PARTIAL_RESOLVE;
}
static bool
aux_op_clears(enum isl_aux_op aux_op)
{
return aux_op == ISL_AUX_OP_FAST_CLEAR ||
aux_op == ISL_AUX_OP_AMBIGUATE;
}
static bool
aux_op_renders(enum isl_aux_op aux_op)
{
return aux_op == ISL_AUX_OP_NONE;
return op == ANV_COLOR_AUX_OP_CLASS_FAST_CLEAR ||
op == ANV_COLOR_AUX_OP_CLASS_HW_AMBIGUATE;
}
static void
add_pending_pipe_bits_for_color_aux_op(struct anv_cmd_buffer *cmd_buffer,
enum isl_aux_op next_aux_op,
enum anv_color_aux_op_class next_aux_op,
enum anv_pipe_bits pipe_bits,
const char *reason)
{
const enum isl_aux_op last_aux_op = cmd_buffer->state.color_aux_op;
const enum anv_color_aux_op_class last_aux_op = cmd_buffer->state.color_aux_op;
assert(next_aux_op != last_aux_op);
anv_add_pending_pipe_bits(cmd_buffer,
aux_op_clears(next_aux_op) ?
is_hw_managed_fast_clear(next_aux_op) ?
VK_PIPELINE_STAGE_2_NONE :
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
aux_op_clears(next_aux_op) ?
is_hw_managed_fast_clear(next_aux_op) ?
VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT : 0,
pipe_bits, reason);
}
void
genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer,
enum isl_aux_op next_aux_op)
enum anv_color_aux_op_class next_aux_op)
{
const enum isl_aux_op last_aux_op = cmd_buffer->state.color_aux_op;
const enum anv_color_aux_op_class last_aux_op = cmd_buffer->state.color_aux_op;
if (!aux_op_clears(last_aux_op) && aux_op_clears(next_aux_op)) {
if (!is_hw_managed_fast_clear(last_aux_op) &&
is_hw_managed_fast_clear(next_aux_op)) {
#if GFX_VER >= 20
/* From the Xe2 Bspec 57340 (r59562),
* "MCS/CCS Buffers, Fast Clear for Render Target(s)":
@ -3529,7 +3517,7 @@ genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer,
*/
add_pending_pipe_bits_for_color_aux_op(
cmd_buffer, next_aux_op, ANV_PIPE_RT_BTI_CHANGE,
"aux color !aux->aux");
"aux color !fast-clear->fast-clear");
#elif GFX_VERx10 == 125
/* From the ACM Bspec 47704 (r52663), "Render Target Fast Clear":
@ -3556,7 +3544,7 @@ genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer,
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
ANV_PIPE_DATA_CACHE_FLUSH_BIT |
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,
"aux color !aux->aux");
"aux color !fast-clear->fast-clear");
#elif GFX_VERx10 == 120
/* From the TGL Bspec 47704 (r52663), "Render Target Fast Clear":
@ -3580,7 +3568,7 @@ genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer,
ANV_PIPE_TILE_CACHE_FLUSH_BIT |
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,
"aux color !aux->aux");
"aux color !fast-clear->fast-clear");
#else
/* From the Sky Lake PRM Vol. 7, "MCS Buffer for Render Target(s)":
@ -3609,9 +3597,10 @@ genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer,
cmd_buffer, next_aux_op,
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
ANV_PIPE_END_OF_PIPE_SYNC_BIT,
"aux color !aux->aux");
"aux color !fast-clear->fast-clear");
#endif
} else if (aux_op_clears(last_aux_op) && !aux_op_clears(next_aux_op)) {
} else if (is_hw_managed_fast_clear(last_aux_op) &&
!is_hw_managed_fast_clear(next_aux_op)) {
#if GFX_VERx10 >= 125
/* From the ACM PRM Vol. 9, "Color Fast Clear Synchronization":
*
@ -3623,7 +3612,7 @@ genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer,
*/
add_pending_pipe_bits_for_color_aux_op(
cmd_buffer, next_aux_op, ANV_PIPE_RT_BTI_CHANGE,
"aux color aux->!aux");
"aux color fast-clear->!fast-clear");
#elif GFX_VERx10 == 120
/* From the TGL PRM Vol. 9, "Color Fast Clear Synchronization":
@ -3648,7 +3637,7 @@ genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer,
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
ANV_PIPE_L3_FABRIC_FLUSH_BIT |
ANV_PIPE_DEPTH_STALL_BIT,
"aux color aux->!aux");
"aux color fast-clear->!fast-clear");
#else
/* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
@ -3667,11 +3656,11 @@ genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer,
cmd_buffer, next_aux_op,
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
ANV_PIPE_END_OF_PIPE_SYNC_BIT,
"aux color aux->!aux");
"aux color fast-clear->!fast-clear");
#endif
} else if (aux_op_renders(last_aux_op) != aux_op_renders(next_aux_op)) {
assert(aux_op_resolves(last_aux_op) != aux_op_resolves(next_aux_op));
} else if (last_aux_op != next_aux_op &&
!is_hw_managed_fast_clear(last_aux_op) &&
!is_hw_managed_fast_clear(next_aux_op)) {
/* From the Sky Lake PRM Vol. 7, "MCS Buffer for Render Target(s)":
*
* Any transition from any value in {Clear, Render, Resolve} to a
@ -3691,11 +3680,11 @@ genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer,
cmd_buffer, next_aux_op,
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
ANV_PIPE_END_OF_PIPE_SYNC_BIT,
"aux color render->!render");
"aux color change (non fast-clear)");
}
if (last_aux_op != ISL_AUX_OP_FAST_CLEAR &&
next_aux_op == ISL_AUX_OP_FAST_CLEAR &&
if (last_aux_op != ANV_COLOR_AUX_OP_CLASS_FAST_CLEAR &&
next_aux_op == ANV_COLOR_AUX_OP_CLASS_FAST_CLEAR &&
cmd_buffer->device->isl_dev.ss.clear_color_state_size > 0) {
/* From the ICL PRM Vol. 9, "State Caching":
*
@ -3718,9 +3707,8 @@ genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer,
}
/* Update the auxiliary surface operation, but with one exception. */
if (last_aux_op == ISL_AUX_OP_FAST_CLEAR &&
next_aux_op == ISL_AUX_OP_AMBIGUATE) {
assert(aux_op_clears(last_aux_op) && aux_op_clears(next_aux_op));
if (last_aux_op == ANV_COLOR_AUX_OP_CLASS_HW_AMBIGUATE &&
next_aux_op == ANV_COLOR_AUX_OP_CLASS_FAST_CLEAR) {
/* Fast clears and ambiguates are in the same class of operation, but
* fast clears have more stringent synchronization requirements. For
* better performance, don't replace the current fast clear operation
@ -4045,7 +4033,7 @@ end_command_buffer(struct anv_cmd_buffer *cmd_buffer)
}
/* Flush any in-progress CCS/MCS operations in preparation for chaining. */
genX(cmd_buffer_update_color_aux_op(cmd_buffer, ISL_AUX_OP_NONE));
genX(cmd_buffer_update_color_aux_op)(cmd_buffer, ANV_COLOR_AUX_OP_CLASS_NONE);
genX(cmd_buffer_flush_generated_draws)(cmd_buffer);
@ -4158,7 +4146,7 @@ genX(CmdExecuteCommands)(
/* Ensure we're in a regular drawing cache mode (assumption for all
* secondary).
*/
genX(cmd_buffer_update_color_aux_op(container, ISL_AUX_OP_NONE));
genX(cmd_buffer_update_color_aux_op)(container, ANV_COLOR_AUX_OP_CLASS_NONE);
/* The secondary command buffer doesn't know which textures etc. have been
* flushed prior to their execution. Apply those flushes now.

View file

@ -100,7 +100,7 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
comp_state->shader->prog_data->total_shared > 0 ?
device->l3_slm_config : device->l3_config);
genX(cmd_buffer_update_color_aux_op(cmd_buffer, ISL_AUX_OP_NONE));
genX(cmd_buffer_update_color_aux_op)(cmd_buffer, ANV_COLOR_AUX_OP_CLASS_NONE);
genX(flush_descriptor_buffers)(cmd_buffer, &comp_state->base,
VK_SHADER_STAGE_COMPUTE_BIT);
@ -1162,7 +1162,7 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
genX(cmd_buffer_config_l3)(cmd_buffer, device->l3_config);
genX(cmd_buffer_update_color_aux_op(cmd_buffer, ISL_AUX_OP_NONE));
genX(cmd_buffer_update_color_aux_op)(cmd_buffer, ANV_COLOR_AUX_OP_CLASS_NONE);
genX(flush_descriptor_buffers)(cmd_buffer, &rt->base,
ANV_RT_STAGE_BITS);

View file

@ -801,7 +801,7 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer)
genX(cmd_buffer_config_l3)(cmd_buffer, device->l3_config);
genX(cmd_buffer_update_color_aux_op(cmd_buffer, ISL_AUX_OP_NONE));
genX(cmd_buffer_update_color_aux_op)(cmd_buffer, ANV_COLOR_AUX_OP_CLASS_NONE);
genX(cmd_buffer_emit_hashing_mode)(cmd_buffer, UINT_MAX, UINT_MAX, 1);