anv/blorp/iris: rework Wa_14025112257

Drivers already have to track this workaround, so remove the logic
from Blorp and let the driver manage this.

Also in Anv don't accumulate this workaround, emit it directly in
place right after COMPUTE_WALKER. Accumulating can be problematic when
you want to dispatch concurrent compute shaders that do not need any
cache flush interaction (typical example with the internal
simple_shader framework).

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Fixes: 3e0ad0176b ("anv: Emit state cache invalidation after every compute dispatch")
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38306>
This commit is contained in:
Lionel Landwerlin 2025-11-07 14:15:14 +02:00 committed by Marge Bot
parent 0c31313b6e
commit c478b6355a
7 changed files with 46 additions and 38 deletions

View file

@ -464,6 +464,15 @@ iris_blorp_exec_blitter(struct blorp_batch *blorp_batch,
iris_bo_bump_seqno(params->dst.addr.buffer, batch->next_seqno,
IRIS_DOMAIN_OTHER_WRITE);
/*
* TDOD: Add INTEL_NEEDS_WA_14025112257 check once HSD is propogated for all
* other impacted platforms.
*/
if (batch->screen->devinfo->ver >= 20 && batch->name == IRIS_BATCH_COMPUTE) {
iris_emit_pipe_control_flush(batch, "WA_14025112257",
PIPE_CONTROL_STATE_CACHE_INVALIDATE);
}
}
static void

View file

@ -9951,6 +9951,16 @@ iris_emit_raw_pipe_control(struct iris_batch *batch,
}
#endif
#if GFX_VER >= 12
/* BSpec 47112 (xe), 56551 (xe2): Instruction_PIPE_CONTROL (ComputeCS):
* SW must follow below programming restrictions when programming
* PIPE_CONTROL command:
* "Command Streamer Stall Enable" must be always set.
*/
if (batch->name == IRIS_BATCH_COMPUTE)
flags |= PIPE_CONTROL_CS_STALL;
#endif
/* The "L3 Read Only Cache Invalidation Bit" docs say it "controls the
* invalidation of the Geometry streams cached in L3 cache at the top
* of the pipe". In other words, index & vertex data that gets cached

View file

@ -1831,25 +1831,6 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
blorp_emit(batch, GENX(COMPUTE_WALKER), cw) {
cw.body = body;
}
/*
* TDOD: Add INTEL_NEEDS_WA_14025112257 check once HSD is propogated for all
* other impacted platforms.
*
* BSpec 47112 (xe), 56551 (xe2): Instruction_PIPE_CONTROL (ComputeCS):
* SW must follow below programming restrictions when programming
* PIPE_CONTROL command:
*
* "Command Streamer Stall Enable" must be always set.
* ...
*/
if (devinfo->ver >= 20) {
blorp_emit(batch, GENX(PIPE_CONTROL), pc) {
pc.CommandStreamerStallEnable =
batch->flags & BLORP_BATCH_COMPUTE_ENGINE;
pc.StateCacheInvalidationEnable = true;
}
}
#else
/* The MEDIA_VFE_STATE documentation for Gfx8+ says:

View file

@ -174,19 +174,21 @@ genX(cmd_buffer_set_coarse_pixel_active)(struct anv_cmd_buffer *cmd_buffer,
#endif
}
/*
* TDOD: Add INTEL_NEEDS_WA_14025112257 check once HSD is propogated for all
* other impacted platforms.
*/
static inline void
genX(cmd_buffer_state_cache_inval_wa_14025112257)(
struct anv_cmd_buffer *cmd_buffer)
genX(cmd_buffer_post_dispatch_wa)(struct anv_cmd_buffer *cmd_buffer)
{
/* TODO: Add INTEL_NEEDS_WA_14025112257 check once HSD is propogated for all
* other impacted platforms.
*/
if (cmd_buffer->device->info->ver >= 20 &&
anv_cmd_buffer_is_compute_queue(cmd_buffer)) {
anv_add_pending_pipe_bits(cmd_buffer,
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT,
"WA_14025112257");
enum anv_pipe_bits emitted_bits = 0;
genX(emit_apply_pipe_flushes)(&cmd_buffer->batch,
cmd_buffer->device,
cmd_buffer->state.current_pipeline,
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT,
&emitted_bits);
cmd_buffer->state.pending_pipe_bits &= ~emitted_bits;
}
}

View file

@ -458,6 +458,8 @@ blorp_exec_on_compute(struct blorp_batch *batch,
cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
cmd_buffer->state.compute.pipeline_dirty = true;
genX(cmd_buffer_post_dispatch_wa)(cmd_buffer);
}
static void

View file

@ -477,7 +477,7 @@ emit_indirect_compute_walker(struct anv_cmd_buffer *cmd_buffer,
indirect_addr.bo, 0),
);
genX(cmd_buffer_state_cache_inval_wa_14025112257)(cmd_buffer);
genX(cmd_buffer_post_dispatch_wa)(cmd_buffer);
}
static inline void
@ -545,7 +545,7 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
#endif
);
genX(cmd_buffer_state_cache_inval_wa_14025112257)(cmd_buffer);
genX(cmd_buffer_post_dispatch_wa)(cmd_buffer);
}
#else /* #if GFX_VERx10 >= 125 */
@ -1325,7 +1325,7 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
.body = body,
);
genX(cmd_buffer_state_cache_inval_wa_14025112257)(cmd_buffer);
genX(cmd_buffer_post_dispatch_wa)(cmd_buffer);
trace_intel_end_rays(&cmd_buffer->trace,
params->launch_size[0],

View file

@ -667,13 +667,17 @@ genX(emit_simple_shader_dispatch)(struct anv_simple_shader *state,
cw.body = body;
}
/* TODO: switch to use INTEL_NEEDS_WA_14025112257 */
if (device->info->ver >= 20 &&
batch->engine_class == INTEL_ENGINE_CLASS_COMPUTE) {
enum anv_pipe_bits emitted_bits = 0;
genX(emit_apply_pipe_flushes)(batch, device, GPGPU,
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT,
&emitted_bits);
if (state->cmd_buffer) {
genX(cmd_buffer_post_dispatch_wa)(state->cmd_buffer);
} else {
/* TODO: switch to use INTEL_NEEDS_WA_14025112257 */
if (device->info->ver >= 20 &&
batch->engine_class == INTEL_ENGINE_CLASS_COMPUTE) {
enum anv_pipe_bits emitted_bits = 0;
genX(emit_apply_pipe_flushes)(batch, device, GPGPU,
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT,
&emitted_bits);
}
}
#else /* GFX_VERx10 < 125 */