diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index c905e664a5f..5234823b94a 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -108,12 +108,6 @@ void genX(batch_emit_vertex_input)(struct anv_batch *batch, struct anv_shader *shader, const struct vk_vertex_input_state *vi); -enum anv_pipe_bits -genX(emit_apply_pipe_flushes)(struct anv_batch *batch, - struct anv_device *device, - uint32_t current_pipeline, - enum anv_pipe_bits bits, - enum anv_pipe_bits *emitted_flush_bits); void genX(invalidate_aux_map)(struct anv_batch *batch, struct anv_device *device, @@ -174,24 +168,6 @@ genX(cmd_buffer_set_coarse_pixel_active)(struct anv_cmd_buffer *cmd_buffer, #endif } -static inline void -genX(cmd_buffer_post_dispatch_wa)(struct anv_cmd_buffer *cmd_buffer) -{ - /* TODO: Add INTEL_NEEDS_WA_14025112257 check once HSD is propogated for all - * other impacted platforms. - */ - if (cmd_buffer->device->info->ver >= 20 && - anv_cmd_buffer_is_compute_queue(cmd_buffer)) { - enum anv_pipe_bits emitted_bits = 0; - genX(emit_apply_pipe_flushes)(&cmd_buffer->batch, - cmd_buffer->device, - cmd_buffer->state.current_pipeline, - ANV_PIPE_STATE_CACHE_INVALIDATE_BIT, - &emitted_bits); - cmd_buffer->state.pending_pipe_bits &= ~emitted_bits; - } -} - void genX(setup_autostrip_state)(struct anv_cmd_buffer *cmd_buffer, bool enable); @@ -200,7 +176,8 @@ void genX(emit_so_memcpy_init)(struct anv_memcpy_state *state, struct anv_cmd_buffer *cmd_buffer, struct anv_batch *batch); -void genX(emit_so_memcpy_fini)(struct anv_memcpy_state *state); +void genX(emit_so_memcpy_fini)(struct anv_memcpy_state *state, + bool wait_completion); void genX(emit_so_memcpy_end)(struct anv_memcpy_state *state); @@ -538,3 +515,19 @@ void genX(write_rt_shader_group)(struct anv_device *device, uint32_t genX(shader_cmd_size)(struct anv_device *device, mesa_shader_stage stage); + +static inline void +genX(cmd_buffer_post_dispatch_wa)(struct anv_cmd_buffer *cmd_buffer) +{ + /* TODO: Add INTEL_NEEDS_WA_14025112257 check once HSD is propogated for all + * other impacted platforms. + */ + if (cmd_buffer->device->info->ver >= 20 && + anv_cmd_buffer_is_compute_queue(cmd_buffer)) { + genX(batch_emit_pipe_control)(&cmd_buffer->batch, + cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, + ANV_PIPE_STATE_CACHE_INVALIDATE_BIT, + "Wa_14025112257"); + } +} diff --git a/src/intel/vulkan/anv_utrace.c b/src/intel/vulkan/anv_utrace.c index d7a43a6bbbf..11f7d6bb3cb 100644 --- a/src/intel/vulkan/anv_utrace.c +++ b/src/intel/vulkan/anv_utrace.c @@ -219,7 +219,6 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, anv_device_utrace_emit_gfx_copy_buffer); } } - anv_genX(device->info, emit_so_memcpy_fini)(&submit->memcpy_state); trace_intel_end_trace_copy_cb(&submit->ds.trace, batch, num_traces); diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c index a1fa31557c5..68147936e56 100644 --- a/src/intel/vulkan/genX_blorp_exec.c +++ b/src/intel/vulkan/genX_blorp_exec.c @@ -330,12 +330,11 @@ blorp_exec_on_render(struct blorp_batch *batch, hw_state->ds_write_state = blorp_ds_state; BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_WA_18019816803); - /* Add the stall that will flush prior to the blorp operation by - * genX(cmd_buffer_apply_pipe_flushes) - */ - anv_add_pending_pipe_bits(cmd_buffer, - ANV_PIPE_PSS_STALL_SYNC_BIT, - "Wa_18019816803"); + genX(batch_emit_pipe_control)(&cmd_buffer->batch, + cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, + ANV_PIPE_PSS_STALL_SYNC_BIT, + "Wa_18019816803"); } } #endif diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 8b15334708b..0cebb6a4e1e 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -1632,7 +1632,7 @@ genX(invalidate_aux_map)(struct anv_batch *batch, #endif } -ALWAYS_INLINE enum anv_pipe_bits +ALWAYS_INLINE static enum anv_pipe_bits genX(emit_apply_pipe_flushes)(struct anv_batch *batch, struct anv_device *device, uint32_t current_pipeline, @@ -3630,12 +3630,7 @@ genX(CmdExecuteCommands)( src_state.alloc_size); } } - genX(emit_so_memcpy_fini)(&memcpy_state); - - anv_add_pending_pipe_bits(container, - ANV_PIPE_CS_STALL_BIT | ANV_PIPE_STALL_AT_SCOREBOARD_BIT, - "Wait for primary->secondary RP surface state copies"); - genX(cmd_buffer_apply_pipe_flushes)(container); + genX(emit_so_memcpy_fini)(&memcpy_state, true); if (container->vk.pool->flags & VK_COMMAND_POOL_CREATE_PROTECTED_BIT) genX(cmd_buffer_set_protected_memory)(container, true); @@ -3788,7 +3783,7 @@ genX(CmdExecuteCommands)( &memcpy_state, anv_device_utrace_emit_gfx_copy_buffer); } - genX(emit_so_memcpy_fini)(&memcpy_state); + genX(emit_so_memcpy_fini)(&memcpy_state, true); trace_intel_end_trace_copy(&container->trace, num_traces); diff --git a/src/intel/vulkan/genX_cmd_draw_generated_flush.h b/src/intel/vulkan/genX_cmd_draw_generated_flush.h index 2240d1e1918..e61c39c483b 100644 --- a/src/intel/vulkan/genX_cmd_draw_generated_flush.h +++ b/src/intel/vulkan/genX_cmd_draw_generated_flush.h @@ -46,15 +46,15 @@ genX(cmd_buffer_flush_generated_draws)(struct anv_cmd_buffer *cmd_buffer) struct anv_batch *batch = &cmd_buffer->generation.batch; /* Wait for all the generation vertex shader to generate the commands. */ - genX(emit_apply_pipe_flushes)(batch, - cmd_buffer->device, + genX(batch_emit_pipe_control)(batch, + cmd_buffer->device->info, _3D, #if GFX_VER == 9 ANV_PIPE_VF_CACHE_INVALIDATE_BIT | #endif ANV_PIPE_DATA_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT, - NULL /* emitted_bits */); + "generated draw flush"); #if GFX_VER >= 12 anv_batch_emit(batch, GENX(MI_ARB_CHECK), arb) { diff --git a/src/intel/vulkan/genX_gfx_state.c b/src/intel/vulkan/genX_gfx_state.c index 9c99df352c3..6118da99875 100644 --- a/src/intel/vulkan/genX_gfx_state.c +++ b/src/intel/vulkan/genX_gfx_state.c @@ -3962,9 +3962,10 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer) #if INTEL_WA_18019816803_GFX_VER if (IS_DIRTY(WA_18019816803)) { - genx_batch_emit_pipe_control(batch, device->info, - cmd_buffer->state.current_pipeline, - ANV_PIPE_PSS_STALL_SYNC_BIT); + genX(batch_emit_pipe_control)(batch, device->info, + cmd_buffer->state.current_pipeline, + ANV_PIPE_PSS_STALL_SYNC_BIT, + "Wa_18019816803"); } #endif diff --git a/src/intel/vulkan/genX_gpu_memcpy.c b/src/intel/vulkan/genX_gpu_memcpy.c index c2696ecb78b..6ec55eb89f5 100644 --- a/src/intel/vulkan/genX_gpu_memcpy.c +++ b/src/intel/vulkan/genX_gpu_memcpy.c @@ -318,11 +318,14 @@ genX(emit_so_memcpy_init)(struct anv_memcpy_state *state, } void -genX(emit_so_memcpy_fini)(struct anv_memcpy_state *state) +genX(emit_so_memcpy_fini)(struct anv_memcpy_state *state, + bool wait_completion) { - genX(emit_apply_pipe_flushes)(state->batch, state->device, _3D, - ANV_PIPE_END_OF_PIPE_SYNC_BIT, - NULL); + if (wait_completion) { + genX(batch_emit_pipe_control)(state->batch, state->device->info, _3D, + ANV_PIPE_END_OF_PIPE_SYNC_BIT, + "Post GPU memcpy wait"); + } if (state->cmd_buffer) { /* Flag all the instructions emitted by the memcpy. */ @@ -375,7 +378,11 @@ genX(emit_so_memcpy_fini)(struct anv_memcpy_state *state) void genX(emit_so_memcpy_end)(struct anv_memcpy_state *state) { -#if INTEL_WA_16013994831_GFX_VER + genX(batch_emit_pipe_control)(state->batch, state->device->info, _3D, + ANV_PIPE_END_OF_PIPE_SYNC_BIT, + "Post GPU memcpy wait"); + + #if INTEL_WA_16013994831_GFX_VER /* Turn preemption back on when we're done */ if (intel_needs_workaround(state->device->info, 16013994831)) genX(batch_set_preemption)(state->batch, state->device, _3D, true); @@ -396,10 +403,10 @@ genX(emit_so_memcpy)(struct anv_memcpy_state *state, anv_gfx8_9_vb_cache_range_needs_workaround(&state->vb_bound, &state->vb_dirty, src, size)) { - genX(emit_apply_pipe_flushes)(state->batch, state->device, _3D, + genX(batch_emit_pipe_control)(state->batch, state->device->info, _3D, ANV_PIPE_CS_STALL_BIT | ANV_PIPE_VF_CACHE_INVALIDATE_BIT, - NULL); + "Gfx9 VB cache workaround"); memset(&state->vb_dirty, 0, sizeof(state->vb_dirty)); } diff --git a/src/intel/vulkan/genX_simple_shader.c b/src/intel/vulkan/genX_simple_shader.c index 507fb48b3ca..f124181a234 100644 --- a/src/intel/vulkan/genX_simple_shader.c +++ b/src/intel/vulkan/genX_simple_shader.c @@ -673,10 +673,9 @@ genX(emit_simple_shader_dispatch)(struct anv_simple_shader *state, /* TODO: switch to use INTEL_NEEDS_WA_14025112257 */ if (device->info->ver >= 20 && batch->engine_class == INTEL_ENGINE_CLASS_COMPUTE) { - enum anv_pipe_bits emitted_bits = 0; - genX(emit_apply_pipe_flushes)(batch, device, GPGPU, + genX(batch_emit_pipe_control)(batch, devinfo, GPGPU, ANV_PIPE_STATE_CACHE_INVALIDATE_BIT, - &emitted_bits); + "Wa_14025112257"); } } @@ -693,15 +692,9 @@ genX(emit_simple_shader_dispatch)(struct anv_simple_shader *state, * these scoreboard related states, a MEDIA_STATE_FLUSH is * sufficient." */ - enum anv_pipe_bits emitted_bits = 0; - genX(emit_apply_pipe_flushes)(batch, device, GPGPU, ANV_PIPE_CS_STALL_BIT, - &emitted_bits); - - /* If we have a command buffer allocated with the emission, update the - * pending bits. - */ - if (state->cmd_buffer) - anv_cmd_buffer_update_pending_query_bits(state->cmd_buffer, emitted_bits); + genX(batch_emit_pipe_control)(batch, devinfo, GPGPU, + ANV_PIPE_CS_STALL_BIT, + "pre MEDIA_VFE_STATE"); anv_batch_emit(batch, GENX(MEDIA_VFE_STATE), vfe) { vfe.StackSize = 0;