diff --git a/src/gallium/drivers/iris/iris_blorp.c b/src/gallium/drivers/iris/iris_blorp.c index 443076b234b..55cc78aad98 100644 --- a/src/gallium/drivers/iris/iris_blorp.c +++ b/src/gallium/drivers/iris/iris_blorp.c @@ -464,6 +464,15 @@ iris_blorp_exec_blitter(struct blorp_batch *blorp_batch, iris_bo_bump_seqno(params->dst.addr.buffer, batch->next_seqno, IRIS_DOMAIN_OTHER_WRITE); + + /* + * TDOD: Add INTEL_NEEDS_WA_14025112257 check once HSD is propogated for all + * other impacted platforms. + */ + if (batch->screen->devinfo->ver >= 20 && batch->name == IRIS_BATCH_COMPUTE) { + iris_emit_pipe_control_flush(batch, "WA_14025112257", + PIPE_CONTROL_STATE_CACHE_INVALIDATE); + } } static void diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index e1332d9ac48..eef7df7a609 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -9951,6 +9951,16 @@ iris_emit_raw_pipe_control(struct iris_batch *batch, } #endif +#if GFX_VER >= 12 + /* BSpec 47112 (xe), 56551 (xe2): Instruction_PIPE_CONTROL (ComputeCS): + * SW must follow below programming restrictions when programming + * PIPE_CONTROL command: + * "Command Streamer Stall Enable" must be always set. + */ + if (batch->name == IRIS_BATCH_COMPUTE) + flags |= PIPE_CONTROL_CS_STALL; +#endif + /* The "L3 Read Only Cache Invalidation Bit" docs say it "controls the * invalidation of the Geometry streams cached in L3 cache at the top * of the pipe". In other words, index & vertex data that gets cached diff --git a/src/intel/blorp/blorp_genX_exec_brw.h b/src/intel/blorp/blorp_genX_exec_brw.h index 76617fd02a6..aa959f9cca7 100644 --- a/src/intel/blorp/blorp_genX_exec_brw.h +++ b/src/intel/blorp/blorp_genX_exec_brw.h @@ -1831,25 +1831,6 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params) blorp_emit(batch, GENX(COMPUTE_WALKER), cw) { cw.body = body; } - - /* - * TDOD: Add INTEL_NEEDS_WA_14025112257 check once HSD is propogated for all - * other impacted platforms. - * - * BSpec 47112 (xe), 56551 (xe2): Instruction_PIPE_CONTROL (ComputeCS): - * SW must follow below programming restrictions when programming - * PIPE_CONTROL command: - * - * "Command Streamer Stall Enable" must be always set. - * ... - */ - if (devinfo->ver >= 20) { - blorp_emit(batch, GENX(PIPE_CONTROL), pc) { - pc.CommandStreamerStallEnable = - batch->flags & BLORP_BATCH_COMPUTE_ENGINE; - pc.StateCacheInvalidationEnable = true; - } - } #else /* The MEDIA_VFE_STATE documentation for Gfx8+ says: diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index d47e1473912..314d059e93d 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -174,19 +174,21 @@ genX(cmd_buffer_set_coarse_pixel_active)(struct anv_cmd_buffer *cmd_buffer, #endif } -/* - * TDOD: Add INTEL_NEEDS_WA_14025112257 check once HSD is propogated for all - * other impacted platforms. - */ static inline void -genX(cmd_buffer_state_cache_inval_wa_14025112257)( - struct anv_cmd_buffer *cmd_buffer) +genX(cmd_buffer_post_dispatch_wa)(struct anv_cmd_buffer *cmd_buffer) { + /* TODO: Add INTEL_NEEDS_WA_14025112257 check once HSD is propogated for all + * other impacted platforms. + */ if (cmd_buffer->device->info->ver >= 20 && anv_cmd_buffer_is_compute_queue(cmd_buffer)) { - anv_add_pending_pipe_bits(cmd_buffer, - ANV_PIPE_STATE_CACHE_INVALIDATE_BIT, - "WA_14025112257"); + enum anv_pipe_bits emitted_bits = 0; + genX(emit_apply_pipe_flushes)(&cmd_buffer->batch, + cmd_buffer->device, + cmd_buffer->state.current_pipeline, + ANV_PIPE_STATE_CACHE_INVALIDATE_BIT, + &emitted_bits); + cmd_buffer->state.pending_pipe_bits &= ~emitted_bits; } } diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c index 4a75f77b621..a1fa31557c5 100644 --- a/src/intel/vulkan/genX_blorp_exec.c +++ b/src/intel/vulkan/genX_blorp_exec.c @@ -458,6 +458,8 @@ blorp_exec_on_compute(struct blorp_batch *batch, cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; cmd_buffer->state.compute.pipeline_dirty = true; + + genX(cmd_buffer_post_dispatch_wa)(cmd_buffer); } static void diff --git a/src/intel/vulkan/genX_cmd_compute.c b/src/intel/vulkan/genX_cmd_compute.c index 192396d8ecf..cbe677fc668 100644 --- a/src/intel/vulkan/genX_cmd_compute.c +++ b/src/intel/vulkan/genX_cmd_compute.c @@ -477,7 +477,7 @@ emit_indirect_compute_walker(struct anv_cmd_buffer *cmd_buffer, indirect_addr.bo, 0), ); - genX(cmd_buffer_state_cache_inval_wa_14025112257)(cmd_buffer); + genX(cmd_buffer_post_dispatch_wa)(cmd_buffer); } static inline void @@ -545,7 +545,7 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer, #endif ); - genX(cmd_buffer_state_cache_inval_wa_14025112257)(cmd_buffer); + genX(cmd_buffer_post_dispatch_wa)(cmd_buffer); } #else /* #if GFX_VERx10 >= 125 */ @@ -1325,7 +1325,7 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer, .body = body, ); - genX(cmd_buffer_state_cache_inval_wa_14025112257)(cmd_buffer); + genX(cmd_buffer_post_dispatch_wa)(cmd_buffer); trace_intel_end_rays(&cmd_buffer->trace, params->launch_size[0], diff --git a/src/intel/vulkan/genX_simple_shader.c b/src/intel/vulkan/genX_simple_shader.c index 7f4bbcd1df5..5ac0160121f 100644 --- a/src/intel/vulkan/genX_simple_shader.c +++ b/src/intel/vulkan/genX_simple_shader.c @@ -667,13 +667,17 @@ genX(emit_simple_shader_dispatch)(struct anv_simple_shader *state, cw.body = body; } - /* TODO: switch to use INTEL_NEEDS_WA_14025112257 */ - if (device->info->ver >= 20 && - batch->engine_class == INTEL_ENGINE_CLASS_COMPUTE) { - enum anv_pipe_bits emitted_bits = 0; - genX(emit_apply_pipe_flushes)(batch, device, GPGPU, - ANV_PIPE_STATE_CACHE_INVALIDATE_BIT, - &emitted_bits); + if (state->cmd_buffer) { + genX(cmd_buffer_post_dispatch_wa)(state->cmd_buffer); + } else { + /* TODO: switch to use INTEL_NEEDS_WA_14025112257 */ + if (device->info->ver >= 20 && + batch->engine_class == INTEL_ENGINE_CLASS_COMPUTE) { + enum anv_pipe_bits emitted_bits = 0; + genX(emit_apply_pipe_flushes)(batch, device, GPGPU, + ANV_PIPE_STATE_CACHE_INVALIDATE_BIT, + &emitted_bits); + } } #else /* GFX_VERx10 < 125 */