From c478b6355afd653db8a211aa533fffd628cb780d Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 7 Nov 2025 14:15:14 +0200 Subject: [PATCH] anv/blorp/iris: rework Wa_14025112257 Drivers already have to track this workaround, so remove the logic from Blorp and let the driver manage this. Also in Anv don't accumulate this workaround, emit it directly in place right after COMPUTE_WALKER. Accumulating can be problematic when you want to dispatch concurrent compute shaders that do not need any cache flush interaction (typical example with the internal simple_shader framework). Signed-off-by: Lionel Landwerlin Fixes: 3e0ad0176b ("anv: Emit state cache invalidation after every compute dispatch") Reviewed-by: Alyssa Rosenzweig Reviewed-by: Sagar Ghuge Part-of: --- src/gallium/drivers/iris/iris_blorp.c | 9 +++++++++ src/gallium/drivers/iris/iris_state.c | 10 ++++++++++ src/intel/blorp/blorp_genX_exec_brw.h | 19 ------------------- src/intel/vulkan/anv_genX.h | 20 +++++++++++--------- src/intel/vulkan/genX_blorp_exec.c | 2 ++ src/intel/vulkan/genX_cmd_compute.c | 6 +++--- src/intel/vulkan/genX_simple_shader.c | 18 +++++++++++------- 7 files changed, 46 insertions(+), 38 deletions(-) diff --git a/src/gallium/drivers/iris/iris_blorp.c b/src/gallium/drivers/iris/iris_blorp.c index 443076b234b..55cc78aad98 100644 --- a/src/gallium/drivers/iris/iris_blorp.c +++ b/src/gallium/drivers/iris/iris_blorp.c @@ -464,6 +464,15 @@ iris_blorp_exec_blitter(struct blorp_batch *blorp_batch, iris_bo_bump_seqno(params->dst.addr.buffer, batch->next_seqno, IRIS_DOMAIN_OTHER_WRITE); + + /* + * TDOD: Add INTEL_NEEDS_WA_14025112257 check once HSD is propogated for all + * other impacted platforms. + */ + if (batch->screen->devinfo->ver >= 20 && batch->name == IRIS_BATCH_COMPUTE) { + iris_emit_pipe_control_flush(batch, "WA_14025112257", + PIPE_CONTROL_STATE_CACHE_INVALIDATE); + } } static void diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index e1332d9ac48..eef7df7a609 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -9951,6 +9951,16 @@ iris_emit_raw_pipe_control(struct iris_batch *batch, } #endif +#if GFX_VER >= 12 + /* BSpec 47112 (xe), 56551 (xe2): Instruction_PIPE_CONTROL (ComputeCS): + * SW must follow below programming restrictions when programming + * PIPE_CONTROL command: + * "Command Streamer Stall Enable" must be always set. + */ + if (batch->name == IRIS_BATCH_COMPUTE) + flags |= PIPE_CONTROL_CS_STALL; +#endif + /* The "L3 Read Only Cache Invalidation Bit" docs say it "controls the * invalidation of the Geometry streams cached in L3 cache at the top * of the pipe". In other words, index & vertex data that gets cached diff --git a/src/intel/blorp/blorp_genX_exec_brw.h b/src/intel/blorp/blorp_genX_exec_brw.h index 76617fd02a6..aa959f9cca7 100644 --- a/src/intel/blorp/blorp_genX_exec_brw.h +++ b/src/intel/blorp/blorp_genX_exec_brw.h @@ -1831,25 +1831,6 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params) blorp_emit(batch, GENX(COMPUTE_WALKER), cw) { cw.body = body; } - - /* - * TDOD: Add INTEL_NEEDS_WA_14025112257 check once HSD is propogated for all - * other impacted platforms. - * - * BSpec 47112 (xe), 56551 (xe2): Instruction_PIPE_CONTROL (ComputeCS): - * SW must follow below programming restrictions when programming - * PIPE_CONTROL command: - * - * "Command Streamer Stall Enable" must be always set. - * ... - */ - if (devinfo->ver >= 20) { - blorp_emit(batch, GENX(PIPE_CONTROL), pc) { - pc.CommandStreamerStallEnable = - batch->flags & BLORP_BATCH_COMPUTE_ENGINE; - pc.StateCacheInvalidationEnable = true; - } - } #else /* The MEDIA_VFE_STATE documentation for Gfx8+ says: diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index d47e1473912..314d059e93d 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -174,19 +174,21 @@ genX(cmd_buffer_set_coarse_pixel_active)(struct anv_cmd_buffer *cmd_buffer, #endif } -/* - * TDOD: Add INTEL_NEEDS_WA_14025112257 check once HSD is propogated for all - * other impacted platforms. - */ static inline void -genX(cmd_buffer_state_cache_inval_wa_14025112257)( - struct anv_cmd_buffer *cmd_buffer) +genX(cmd_buffer_post_dispatch_wa)(struct anv_cmd_buffer *cmd_buffer) { + /* TODO: Add INTEL_NEEDS_WA_14025112257 check once HSD is propogated for all + * other impacted platforms. + */ if (cmd_buffer->device->info->ver >= 20 && anv_cmd_buffer_is_compute_queue(cmd_buffer)) { - anv_add_pending_pipe_bits(cmd_buffer, - ANV_PIPE_STATE_CACHE_INVALIDATE_BIT, - "WA_14025112257"); + enum anv_pipe_bits emitted_bits = 0; + genX(emit_apply_pipe_flushes)(&cmd_buffer->batch, + cmd_buffer->device, + cmd_buffer->state.current_pipeline, + ANV_PIPE_STATE_CACHE_INVALIDATE_BIT, + &emitted_bits); + cmd_buffer->state.pending_pipe_bits &= ~emitted_bits; } } diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c index 4a75f77b621..a1fa31557c5 100644 --- a/src/intel/vulkan/genX_blorp_exec.c +++ b/src/intel/vulkan/genX_blorp_exec.c @@ -458,6 +458,8 @@ blorp_exec_on_compute(struct blorp_batch *batch, cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; cmd_buffer->state.compute.pipeline_dirty = true; + + genX(cmd_buffer_post_dispatch_wa)(cmd_buffer); } static void diff --git a/src/intel/vulkan/genX_cmd_compute.c b/src/intel/vulkan/genX_cmd_compute.c index 192396d8ecf..cbe677fc668 100644 --- a/src/intel/vulkan/genX_cmd_compute.c +++ b/src/intel/vulkan/genX_cmd_compute.c @@ -477,7 +477,7 @@ emit_indirect_compute_walker(struct anv_cmd_buffer *cmd_buffer, indirect_addr.bo, 0), ); - genX(cmd_buffer_state_cache_inval_wa_14025112257)(cmd_buffer); + genX(cmd_buffer_post_dispatch_wa)(cmd_buffer); } static inline void @@ -545,7 +545,7 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer, #endif ); - genX(cmd_buffer_state_cache_inval_wa_14025112257)(cmd_buffer); + genX(cmd_buffer_post_dispatch_wa)(cmd_buffer); } #else /* #if GFX_VERx10 >= 125 */ @@ -1325,7 +1325,7 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer, .body = body, ); - genX(cmd_buffer_state_cache_inval_wa_14025112257)(cmd_buffer); + genX(cmd_buffer_post_dispatch_wa)(cmd_buffer); trace_intel_end_rays(&cmd_buffer->trace, params->launch_size[0], diff --git a/src/intel/vulkan/genX_simple_shader.c b/src/intel/vulkan/genX_simple_shader.c index 7f4bbcd1df5..5ac0160121f 100644 --- a/src/intel/vulkan/genX_simple_shader.c +++ b/src/intel/vulkan/genX_simple_shader.c @@ -667,13 +667,17 @@ genX(emit_simple_shader_dispatch)(struct anv_simple_shader *state, cw.body = body; } - /* TODO: switch to use INTEL_NEEDS_WA_14025112257 */ - if (device->info->ver >= 20 && - batch->engine_class == INTEL_ENGINE_CLASS_COMPUTE) { - enum anv_pipe_bits emitted_bits = 0; - genX(emit_apply_pipe_flushes)(batch, device, GPGPU, - ANV_PIPE_STATE_CACHE_INVALIDATE_BIT, - &emitted_bits); + if (state->cmd_buffer) { + genX(cmd_buffer_post_dispatch_wa)(state->cmd_buffer); + } else { + /* TODO: switch to use INTEL_NEEDS_WA_14025112257 */ + if (device->info->ver >= 20 && + batch->engine_class == INTEL_ENGINE_CLASS_COMPUTE) { + enum anv_pipe_bits emitted_bits = 0; + genX(emit_apply_pipe_flushes)(batch, device, GPGPU, + ANV_PIPE_STATE_CACHE_INVALIDATE_BIT, + &emitted_bits); + } } #else /* GFX_VERx10 < 125 */