anv: remove use of emit_apply_pipe_flushes() in various helpers

For a bunch of workarounds and special cases we want PIPE_CONTROL not
RESOURCE_BARRIER. We want emit_apply_pipe_flushes() to be mostly for
application barriers.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38707>
This commit is contained in:
Lionel Landwerlin 2025-11-25 13:53:20 +02:00 committed by Marge Bot
parent d37a888a9b
commit 4e8a25cf6f
8 changed files with 52 additions and 65 deletions

View file

@ -108,12 +108,6 @@ void genX(batch_emit_vertex_input)(struct anv_batch *batch,
struct anv_shader *shader,
const struct vk_vertex_input_state *vi);
enum anv_pipe_bits
genX(emit_apply_pipe_flushes)(struct anv_batch *batch,
struct anv_device *device,
uint32_t current_pipeline,
enum anv_pipe_bits bits,
enum anv_pipe_bits *emitted_flush_bits);
void
genX(invalidate_aux_map)(struct anv_batch *batch,
struct anv_device *device,
@ -174,24 +168,6 @@ genX(cmd_buffer_set_coarse_pixel_active)(struct anv_cmd_buffer *cmd_buffer,
#endif
}
static inline void
genX(cmd_buffer_post_dispatch_wa)(struct anv_cmd_buffer *cmd_buffer)
{
/* TODO: Add INTEL_NEEDS_WA_14025112257 check once HSD is propogated for all
* other impacted platforms.
*/
if (cmd_buffer->device->info->ver >= 20 &&
anv_cmd_buffer_is_compute_queue(cmd_buffer)) {
enum anv_pipe_bits emitted_bits = 0;
genX(emit_apply_pipe_flushes)(&cmd_buffer->batch,
cmd_buffer->device,
cmd_buffer->state.current_pipeline,
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT,
&emitted_bits);
cmd_buffer->state.pending_pipe_bits &= ~emitted_bits;
}
}
void
genX(setup_autostrip_state)(struct anv_cmd_buffer *cmd_buffer, bool enable);
@ -200,7 +176,8 @@ void genX(emit_so_memcpy_init)(struct anv_memcpy_state *state,
struct anv_cmd_buffer *cmd_buffer,
struct anv_batch *batch);
void genX(emit_so_memcpy_fini)(struct anv_memcpy_state *state);
void genX(emit_so_memcpy_fini)(struct anv_memcpy_state *state,
bool wait_completion);
void genX(emit_so_memcpy_end)(struct anv_memcpy_state *state);
@ -538,3 +515,19 @@ void genX(write_rt_shader_group)(struct anv_device *device,
uint32_t genX(shader_cmd_size)(struct anv_device *device,
mesa_shader_stage stage);
static inline void
genX(cmd_buffer_post_dispatch_wa)(struct anv_cmd_buffer *cmd_buffer)
{
/* TODO: Add INTEL_NEEDS_WA_14025112257 check once HSD is propogated for all
* other impacted platforms.
*/
if (cmd_buffer->device->info->ver >= 20 &&
anv_cmd_buffer_is_compute_queue(cmd_buffer)) {
genX(batch_emit_pipe_control)(&cmd_buffer->batch,
cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT,
"Wa_14025112257");
}
}

View file

@ -219,7 +219,6 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
anv_device_utrace_emit_gfx_copy_buffer);
}
}
anv_genX(device->info, emit_so_memcpy_fini)(&submit->memcpy_state);
trace_intel_end_trace_copy_cb(&submit->ds.trace, batch, num_traces);

View file

@ -330,12 +330,11 @@ blorp_exec_on_render(struct blorp_batch *batch,
hw_state->ds_write_state = blorp_ds_state;
BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_WA_18019816803);
/* Add the stall that will flush prior to the blorp operation by
* genX(cmd_buffer_apply_pipe_flushes)
*/
anv_add_pending_pipe_bits(cmd_buffer,
ANV_PIPE_PSS_STALL_SYNC_BIT,
"Wa_18019816803");
genX(batch_emit_pipe_control)(&cmd_buffer->batch,
cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_PSS_STALL_SYNC_BIT,
"Wa_18019816803");
}
}
#endif

View file

@ -1632,7 +1632,7 @@ genX(invalidate_aux_map)(struct anv_batch *batch,
#endif
}
ALWAYS_INLINE enum anv_pipe_bits
ALWAYS_INLINE static enum anv_pipe_bits
genX(emit_apply_pipe_flushes)(struct anv_batch *batch,
struct anv_device *device,
uint32_t current_pipeline,
@ -3630,12 +3630,7 @@ genX(CmdExecuteCommands)(
src_state.alloc_size);
}
}
genX(emit_so_memcpy_fini)(&memcpy_state);
anv_add_pending_pipe_bits(container,
ANV_PIPE_CS_STALL_BIT | ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
"Wait for primary->secondary RP surface state copies");
genX(cmd_buffer_apply_pipe_flushes)(container);
genX(emit_so_memcpy_fini)(&memcpy_state, true);
if (container->vk.pool->flags & VK_COMMAND_POOL_CREATE_PROTECTED_BIT)
genX(cmd_buffer_set_protected_memory)(container, true);
@ -3788,7 +3783,7 @@ genX(CmdExecuteCommands)(
&memcpy_state,
anv_device_utrace_emit_gfx_copy_buffer);
}
genX(emit_so_memcpy_fini)(&memcpy_state);
genX(emit_so_memcpy_fini)(&memcpy_state, true);
trace_intel_end_trace_copy(&container->trace, num_traces);

View file

@ -46,15 +46,15 @@ genX(cmd_buffer_flush_generated_draws)(struct anv_cmd_buffer *cmd_buffer)
struct anv_batch *batch = &cmd_buffer->generation.batch;
/* Wait for all the generation vertex shader to generate the commands. */
genX(emit_apply_pipe_flushes)(batch,
cmd_buffer->device,
genX(batch_emit_pipe_control)(batch,
cmd_buffer->device->info,
_3D,
#if GFX_VER == 9
ANV_PIPE_VF_CACHE_INVALIDATE_BIT |
#endif
ANV_PIPE_DATA_CACHE_FLUSH_BIT |
ANV_PIPE_CS_STALL_BIT,
NULL /* emitted_bits */);
"generated draw flush");
#if GFX_VER >= 12
anv_batch_emit(batch, GENX(MI_ARB_CHECK), arb) {

View file

@ -3962,9 +3962,10 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
#if INTEL_WA_18019816803_GFX_VER
if (IS_DIRTY(WA_18019816803)) {
genx_batch_emit_pipe_control(batch, device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_PSS_STALL_SYNC_BIT);
genX(batch_emit_pipe_control)(batch, device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_PSS_STALL_SYNC_BIT,
"Wa_18019816803");
}
#endif

View file

@ -318,11 +318,14 @@ genX(emit_so_memcpy_init)(struct anv_memcpy_state *state,
}
void
genX(emit_so_memcpy_fini)(struct anv_memcpy_state *state)
genX(emit_so_memcpy_fini)(struct anv_memcpy_state *state,
bool wait_completion)
{
genX(emit_apply_pipe_flushes)(state->batch, state->device, _3D,
ANV_PIPE_END_OF_PIPE_SYNC_BIT,
NULL);
if (wait_completion) {
genX(batch_emit_pipe_control)(state->batch, state->device->info, _3D,
ANV_PIPE_END_OF_PIPE_SYNC_BIT,
"Post GPU memcpy wait");
}
if (state->cmd_buffer) {
/* Flag all the instructions emitted by the memcpy. */
@ -375,7 +378,11 @@ genX(emit_so_memcpy_fini)(struct anv_memcpy_state *state)
void
genX(emit_so_memcpy_end)(struct anv_memcpy_state *state)
{
#if INTEL_WA_16013994831_GFX_VER
genX(batch_emit_pipe_control)(state->batch, state->device->info, _3D,
ANV_PIPE_END_OF_PIPE_SYNC_BIT,
"Post GPU memcpy wait");
#if INTEL_WA_16013994831_GFX_VER
/* Turn preemption back on when we're done */
if (intel_needs_workaround(state->device->info, 16013994831))
genX(batch_set_preemption)(state->batch, state->device, _3D, true);
@ -396,10 +403,10 @@ genX(emit_so_memcpy)(struct anv_memcpy_state *state,
anv_gfx8_9_vb_cache_range_needs_workaround(&state->vb_bound,
&state->vb_dirty,
src, size)) {
genX(emit_apply_pipe_flushes)(state->batch, state->device, _3D,
genX(batch_emit_pipe_control)(state->batch, state->device->info, _3D,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_VF_CACHE_INVALIDATE_BIT,
NULL);
"Gfx9 VB cache workaround");
memset(&state->vb_dirty, 0, sizeof(state->vb_dirty));
}

View file

@ -673,10 +673,9 @@ genX(emit_simple_shader_dispatch)(struct anv_simple_shader *state,
/* TODO: switch to use INTEL_NEEDS_WA_14025112257 */
if (device->info->ver >= 20 &&
batch->engine_class == INTEL_ENGINE_CLASS_COMPUTE) {
enum anv_pipe_bits emitted_bits = 0;
genX(emit_apply_pipe_flushes)(batch, device, GPGPU,
genX(batch_emit_pipe_control)(batch, devinfo, GPGPU,
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT,
&emitted_bits);
"Wa_14025112257");
}
}
@ -693,15 +692,9 @@ genX(emit_simple_shader_dispatch)(struct anv_simple_shader *state,
* these scoreboard related states, a MEDIA_STATE_FLUSH is
* sufficient."
*/
enum anv_pipe_bits emitted_bits = 0;
genX(emit_apply_pipe_flushes)(batch, device, GPGPU, ANV_PIPE_CS_STALL_BIT,
&emitted_bits);
/* If we have a command buffer allocated with the emission, update the
* pending bits.
*/
if (state->cmd_buffer)
anv_cmd_buffer_update_pending_query_bits(state->cmd_buffer, emitted_bits);
genX(batch_emit_pipe_control)(batch, devinfo, GPGPU,
ANV_PIPE_CS_STALL_BIT,
"pre MEDIA_VFE_STATE");
anv_batch_emit(batch, GENX(MEDIA_VFE_STATE), vfe) {
vfe.StackSize = 0;