mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 19:40:10 +01:00
anv/blorp/iris: rework Wa_14025112257
Drivers already have to track this workaround, so remove the logic
from Blorp and let the driver manage this.
Also in Anv don't accumulate this workaround, emit it directly in
place right after COMPUTE_WALKER. Accumulating can be problematic when
you want to dispatch concurrent compute shaders that do not need any
cache flush interaction (typical example with the internal
simple_shader framework).
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Fixes: 3e0ad0176b ("anv: Emit state cache invalidation after every compute dispatch")
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38306>
This commit is contained in:
parent
0c31313b6e
commit
c478b6355a
7 changed files with 46 additions and 38 deletions
|
|
@ -464,6 +464,15 @@ iris_blorp_exec_blitter(struct blorp_batch *blorp_batch,
|
|||
|
||||
iris_bo_bump_seqno(params->dst.addr.buffer, batch->next_seqno,
|
||||
IRIS_DOMAIN_OTHER_WRITE);
|
||||
|
||||
/*
|
||||
* TDOD: Add INTEL_NEEDS_WA_14025112257 check once HSD is propogated for all
|
||||
* other impacted platforms.
|
||||
*/
|
||||
if (batch->screen->devinfo->ver >= 20 && batch->name == IRIS_BATCH_COMPUTE) {
|
||||
iris_emit_pipe_control_flush(batch, "WA_14025112257",
|
||||
PIPE_CONTROL_STATE_CACHE_INVALIDATE);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -9951,6 +9951,16 @@ iris_emit_raw_pipe_control(struct iris_batch *batch,
|
|||
}
|
||||
#endif
|
||||
|
||||
#if GFX_VER >= 12
|
||||
/* BSpec 47112 (xe), 56551 (xe2): Instruction_PIPE_CONTROL (ComputeCS):
|
||||
* SW must follow below programming restrictions when programming
|
||||
* PIPE_CONTROL command:
|
||||
* "Command Streamer Stall Enable" must be always set.
|
||||
*/
|
||||
if (batch->name == IRIS_BATCH_COMPUTE)
|
||||
flags |= PIPE_CONTROL_CS_STALL;
|
||||
#endif
|
||||
|
||||
/* The "L3 Read Only Cache Invalidation Bit" docs say it "controls the
|
||||
* invalidation of the Geometry streams cached in L3 cache at the top
|
||||
* of the pipe". In other words, index & vertex data that gets cached
|
||||
|
|
|
|||
|
|
@ -1831,25 +1831,6 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
|
|||
blorp_emit(batch, GENX(COMPUTE_WALKER), cw) {
|
||||
cw.body = body;
|
||||
}
|
||||
|
||||
/*
|
||||
* TDOD: Add INTEL_NEEDS_WA_14025112257 check once HSD is propogated for all
|
||||
* other impacted platforms.
|
||||
*
|
||||
* BSpec 47112 (xe), 56551 (xe2): Instruction_PIPE_CONTROL (ComputeCS):
|
||||
* SW must follow below programming restrictions when programming
|
||||
* PIPE_CONTROL command:
|
||||
*
|
||||
* "Command Streamer Stall Enable" must be always set.
|
||||
* ...
|
||||
*/
|
||||
if (devinfo->ver >= 20) {
|
||||
blorp_emit(batch, GENX(PIPE_CONTROL), pc) {
|
||||
pc.CommandStreamerStallEnable =
|
||||
batch->flags & BLORP_BATCH_COMPUTE_ENGINE;
|
||||
pc.StateCacheInvalidationEnable = true;
|
||||
}
|
||||
}
|
||||
#else
|
||||
|
||||
/* The MEDIA_VFE_STATE documentation for Gfx8+ says:
|
||||
|
|
|
|||
|
|
@ -174,19 +174,21 @@ genX(cmd_buffer_set_coarse_pixel_active)(struct anv_cmd_buffer *cmd_buffer,
|
|||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* TDOD: Add INTEL_NEEDS_WA_14025112257 check once HSD is propogated for all
|
||||
* other impacted platforms.
|
||||
*/
|
||||
static inline void
|
||||
genX(cmd_buffer_state_cache_inval_wa_14025112257)(
|
||||
struct anv_cmd_buffer *cmd_buffer)
|
||||
genX(cmd_buffer_post_dispatch_wa)(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
/* TODO: Add INTEL_NEEDS_WA_14025112257 check once HSD is propogated for all
|
||||
* other impacted platforms.
|
||||
*/
|
||||
if (cmd_buffer->device->info->ver >= 20 &&
|
||||
anv_cmd_buffer_is_compute_queue(cmd_buffer)) {
|
||||
anv_add_pending_pipe_bits(cmd_buffer,
|
||||
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT,
|
||||
"WA_14025112257");
|
||||
enum anv_pipe_bits emitted_bits = 0;
|
||||
genX(emit_apply_pipe_flushes)(&cmd_buffer->batch,
|
||||
cmd_buffer->device,
|
||||
cmd_buffer->state.current_pipeline,
|
||||
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT,
|
||||
&emitted_bits);
|
||||
cmd_buffer->state.pending_pipe_bits &= ~emitted_bits;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -458,6 +458,8 @@ blorp_exec_on_compute(struct blorp_batch *batch,
|
|||
cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
cmd_buffer->state.compute.pipeline_dirty = true;
|
||||
|
||||
genX(cmd_buffer_post_dispatch_wa)(cmd_buffer);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -477,7 +477,7 @@ emit_indirect_compute_walker(struct anv_cmd_buffer *cmd_buffer,
|
|||
indirect_addr.bo, 0),
|
||||
);
|
||||
|
||||
genX(cmd_buffer_state_cache_inval_wa_14025112257)(cmd_buffer);
|
||||
genX(cmd_buffer_post_dispatch_wa)(cmd_buffer);
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
|
@ -545,7 +545,7 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
|
|||
#endif
|
||||
);
|
||||
|
||||
genX(cmd_buffer_state_cache_inval_wa_14025112257)(cmd_buffer);
|
||||
genX(cmd_buffer_post_dispatch_wa)(cmd_buffer);
|
||||
}
|
||||
|
||||
#else /* #if GFX_VERx10 >= 125 */
|
||||
|
|
@ -1325,7 +1325,7 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
|
|||
.body = body,
|
||||
);
|
||||
|
||||
genX(cmd_buffer_state_cache_inval_wa_14025112257)(cmd_buffer);
|
||||
genX(cmd_buffer_post_dispatch_wa)(cmd_buffer);
|
||||
|
||||
trace_intel_end_rays(&cmd_buffer->trace,
|
||||
params->launch_size[0],
|
||||
|
|
|
|||
|
|
@ -667,13 +667,17 @@ genX(emit_simple_shader_dispatch)(struct anv_simple_shader *state,
|
|||
cw.body = body;
|
||||
}
|
||||
|
||||
/* TODO: switch to use INTEL_NEEDS_WA_14025112257 */
|
||||
if (device->info->ver >= 20 &&
|
||||
batch->engine_class == INTEL_ENGINE_CLASS_COMPUTE) {
|
||||
enum anv_pipe_bits emitted_bits = 0;
|
||||
genX(emit_apply_pipe_flushes)(batch, device, GPGPU,
|
||||
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT,
|
||||
&emitted_bits);
|
||||
if (state->cmd_buffer) {
|
||||
genX(cmd_buffer_post_dispatch_wa)(state->cmd_buffer);
|
||||
} else {
|
||||
/* TODO: switch to use INTEL_NEEDS_WA_14025112257 */
|
||||
if (device->info->ver >= 20 &&
|
||||
batch->engine_class == INTEL_ENGINE_CLASS_COMPUTE) {
|
||||
enum anv_pipe_bits emitted_bits = 0;
|
||||
genX(emit_apply_pipe_flushes)(batch, device, GPGPU,
|
||||
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT,
|
||||
&emitted_bits);
|
||||
}
|
||||
}
|
||||
|
||||
#else /* GFX_VERx10 < 125 */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue