diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 7799f1780eb..06de5052bea 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -216,6 +216,7 @@ genX(ray_tracing_pipeline_emit)(struct anv_ray_tracing_pipeline *pipeline); void genX(batch_set_preemption)(struct anv_batch *batch, const struct intel_device_info *devinfo, + uint32_t current_pipeline, bool value); void @@ -224,23 +225,25 @@ genX(cmd_buffer_set_preemption)(struct anv_cmd_buffer *cmd_buffer, bool value); void genX(batch_emit_pipe_control)(struct anv_batch *batch, const struct intel_device_info *devinfo, + uint32_t current_pipeline, enum anv_pipe_bits bits, const char *reason); void genX(batch_emit_pipe_control_write)(struct anv_batch *batch, const struct intel_device_info *devinfo, + uint32_t current_pipeline, uint32_t post_sync_op, struct anv_address address, uint32_t imm_data, enum anv_pipe_bits bits, const char *reason); -#define genx_batch_emit_pipe_control(a, b, c) \ -genX(batch_emit_pipe_control) (a, b, c, __func__) +#define genx_batch_emit_pipe_control(a, b, c, d) \ +genX(batch_emit_pipe_control) (a, b, c, d, __func__) -#define genx_batch_emit_pipe_control_write(a, b, c, d, e, f) \ -genX(batch_emit_pipe_control_write) (a, b, c, d, e, f, __func__) +#define genx_batch_emit_pipe_control_write(a, b, c, d, e, f, g) \ +genX(batch_emit_pipe_control_write) (a, b, c, d, e, f, g, __func__) void genX(batch_emit_breakpoint)(struct anv_batch *batch, struct anv_device *device, diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 424e917ee6a..7007b30d5cf 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -110,6 +110,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) #if GFX_VERx10 >= 125 genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, ANV_PIPE_CS_STALL_BIT); anv_batch_emit( &cmd_buffer->batch, GENX(3DSTATE_BINDING_TABLE_POOL_ALLOC), btpa) { @@ -128,6 +129,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) */ genx_batch_emit_pipe_control (&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, #if GFX_VER >= 12 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | #else @@ -301,6 +303,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) bits |= ANV_PIPE_CS_STALL_BIT; #endif genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, bits); } @@ -1331,6 +1334,7 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, * which involves a first PIPE_CONTROL flush which stalls the pipeline... */ genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, ANV_PIPE_DATA_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT); @@ -1349,6 +1353,7 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, * (see SKL HSD 2132585). */ genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | @@ -1358,6 +1363,7 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, * complete when the L3 configuration registers are modified. */ genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, ANV_PIPE_DATA_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT); @@ -1588,8 +1594,8 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch, } /* Flush PC. */ - genx_batch_emit_pipe_control_write(batch, device->info, sync_op, addr, - 0, flush_bits); + genx_batch_emit_pipe_control_write(batch, device->info, current_pipeline, + sync_op, addr, 0, flush_bits); /* If the caller wants to know what flushes have been emitted, * provide the bits based off the PIPE_CONTROL programmed bits. @@ -1644,8 +1650,8 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch, } /* Invalidate PC. */ - genx_batch_emit_pipe_control_write(batch, device->info, sync_op, addr, - 0, bits); + genx_batch_emit_pipe_control_write(batch, device->info, current_pipeline, + sync_op, addr, 0, bits); #if GFX_VER == 12 if ((bits & ANV_PIPE_AUX_TABLE_INVALIDATE_BIT) && device->info->has_aux_map) { @@ -2821,11 +2827,13 @@ cmd_buffer_flush_mesh_inline_data(struct anv_cmd_buffer *cmd_buffer, ALWAYS_INLINE void genX(batch_emit_pipe_control)(struct anv_batch *batch, const struct intel_device_info *devinfo, + uint32_t current_pipeline, enum anv_pipe_bits bits, const char *reason) { genX(batch_emit_pipe_control_write)(batch, devinfo, + current_pipeline, NoWrite, ANV_NULL_ADDRESS, 0, @@ -2836,6 +2844,7 @@ genX(batch_emit_pipe_control)(struct anv_batch *batch, ALWAYS_INLINE void genX(batch_emit_pipe_control_write)(struct anv_batch *batch, const struct intel_device_info *devinfo, + uint32_t current_pipeline, uint32_t post_sync_op, struct anv_address address, uint32_t imm_data, @@ -2908,6 +2917,7 @@ genX(batch_emit_pipe_control_write)(struct anv_batch *batch, void genX(batch_set_preemption)(struct anv_batch *batch, const struct intel_device_info *devinfo, + uint32_t current_pipeline, bool value) { #if GFX_VERx10 >= 120 @@ -2917,7 +2927,8 @@ genX(batch_set_preemption)(struct anv_batch *batch, } /* Wa_16013994831 - we need to insert CS_STALL and 250 noops. */ - genx_batch_emit_pipe_control(batch, devinfo, ANV_PIPE_CS_STALL_BIT); + genx_batch_emit_pipe_control(batch, devinfo, current_pipeline, + ANV_PIPE_CS_STALL_BIT); for (unsigned i = 0; i < 250; i++) anv_batch_emit(batch, GENX(MI_NOOP), noop); @@ -2932,6 +2943,7 @@ genX(cmd_buffer_set_preemption)(struct anv_cmd_buffer *cmd_buffer, bool value) return; genX(batch_set_preemption)(&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, value); cmd_buffer->state.gfx.object_preemption = value; #endif @@ -3452,6 +3464,7 @@ emit_isp_disable(struct anv_cmd_buffer *cmd_buffer) { genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, ANV_PIPE_CS_STALL_BIT | ANV_PIPE_STALL_AT_SCOREBOARD_BIT); anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { @@ -6274,6 +6287,7 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer, cmd_buffer->queue_family->engine_class == INTEL_ENGINE_CLASS_COMPUTE) { genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, ANV_PIPE_CS_STALL_BIT | ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | @@ -6961,6 +6975,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) if (intel_needs_workaround(cmd_buffer->device->info, 14016712196)) { genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, ANV_PIPE_DEPTH_CACHE_FLUSH_BIT); } @@ -6983,7 +6998,8 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) * This also seems sufficient to handle Wa_14014097488. */ genx_batch_emit_pipe_control_write - (&cmd_buffer->batch, cmd_buffer->device->info, WriteImmediateData, + (&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, WriteImmediateData, cmd_buffer->device->workaround_address, 0, 0); } } @@ -7031,6 +7047,7 @@ cmd_buffer_emit_cps_control_buffer(struct anv_cmd_buffer *cmd_buffer, if (intel_needs_workaround(cmd_buffer->device->info, 14016712196)) { genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, ANV_PIPE_DEPTH_CACHE_FLUSH_BIT); } #endif /* GFX_VERx10 >= 125 */ @@ -7896,7 +7913,8 @@ void genX(CmdSetEvent2)( } genx_batch_emit_pipe_control_write - (&cmd_buffer->batch, cmd_buffer->device->info, WriteImmediateData, + (&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, WriteImmediateData, anv_state_pool_state_address(&cmd_buffer->device->dynamic_state_pool, event->state), VK_EVENT_SET, pc_bits); @@ -7931,7 +7949,8 @@ void genX(CmdResetEvent2)( } genx_batch_emit_pipe_control_write - (&cmd_buffer->batch, cmd_buffer->device->info, WriteImmediateData, + (&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, WriteImmediateData, anv_state_pool_state_address(&cmd_buffer->device->dynamic_state_pool, event->state), VK_EVENT_RESET, @@ -8093,7 +8112,7 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch, fd.Address = addr; } } else { - genx_batch_emit_pipe_control_write(batch, device->info, + genx_batch_emit_pipe_control_write(batch, device->info, 0, WriteTimestamp, addr, 0, 0); } break; @@ -8101,7 +8120,7 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch, case ANV_TIMESTAMP_CAPTURE_AT_CS_STALL: genx_batch_emit_pipe_control_write - (batch, device->info, WriteTimestamp, addr, 0, + (batch, device->info, 0, WriteTimestamp, addr, 0, ANV_PIPE_CS_STALL_BIT); break; @@ -8149,7 +8168,7 @@ genX(batch_emit_dummy_post_sync_op)(struct anv_batch *batch, primitive_topology == _3DPRIM_LINESTRIP_CONT_BF) && (vertex_count == 1 || vertex_count == 2)) { genx_batch_emit_pipe_control_write - (batch, device->info, WriteImmediateData, + (batch, device->info, 0, WriteImmediateData, device->workaround_address, 0, 0); } diff --git a/src/intel/vulkan/genX_gfx_state.c b/src/intel/vulkan/genX_gfx_state.c index 7d3c63705e9..cf760571d41 100644 --- a/src/intel/vulkan/genX_gfx_state.c +++ b/src/intel/vulkan/genX_gfx_state.c @@ -1232,6 +1232,7 @@ genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer) * On DG2+ also known as Wa_1509820217. */ genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info, + cmd_buffer->state.current_pipeline, ANV_PIPE_CS_STALL_BIT); #endif } @@ -1577,6 +1578,7 @@ genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer) */ genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, ANV_PIPE_CS_STALL_BIT); #endif } @@ -1702,6 +1704,7 @@ genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer) #if GFX_VERx10 >= 125 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WA_18019816803)) { genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, ANV_PIPE_PSS_STALL_SYNC_BIT); } #endif @@ -1736,6 +1739,7 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable) */ genx_batch_emit_pipe_control (&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT | #if GFX_VER >= 12 @@ -1764,6 +1768,7 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable) */ genx_batch_emit_pipe_control (&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, ANV_PIPE_DEPTH_STALL_BIT | ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | #if GFX_VER >= 12 diff --git a/src/intel/vulkan/genX_gpu_memcpy.c b/src/intel/vulkan/genX_gpu_memcpy.c index 112f94fd2c0..2f4e3332f17 100644 --- a/src/intel/vulkan/genX_gpu_memcpy.c +++ b/src/intel/vulkan/genX_gpu_memcpy.c @@ -83,7 +83,7 @@ emit_common_so_memcpy(struct anv_batch *batch, struct anv_device *device, /* Wa_16013994831 - Disable preemption during streamout. */ if (intel_needs_workaround(device->info, 16013994831)) - genX(batch_set_preemption)(batch, device->info, false); + genX(batch_set_preemption)(batch, device->info, _3D, false); #endif anv_batch_emit(batch, GENX(3DSTATE_SBE), sbe) { @@ -174,7 +174,7 @@ emit_so_memcpy(struct anv_batch *batch, struct anv_device *device, * state is not combined with other state changes. */ if (intel_needs_workaround(device->info, 16011411144)) - genx_batch_emit_pipe_control(batch, device->info, ANV_PIPE_CS_STALL_BIT); + genx_batch_emit_pipe_control(batch, device->info, _3D, ANV_PIPE_CS_STALL_BIT); anv_batch_emit(batch, GENX(3DSTATE_SO_BUFFER), sob) { #if GFX_VER < 12 @@ -200,7 +200,7 @@ emit_so_memcpy(struct anv_batch *batch, struct anv_device *device, /* Wa_16011411144: also CS_STALL after touching SO_BUFFER change */ if (intel_needs_workaround(device->info, 16011411144)) - genx_batch_emit_pipe_control(batch, device->info, ANV_PIPE_CS_STALL_BIT); + genx_batch_emit_pipe_control(batch, device->info, _3D, ANV_PIPE_CS_STALL_BIT); dw = anv_batch_emitn(batch, 5, GENX(3DSTATE_SO_DECL_LIST), .StreamtoBufferSelects0 = (1 << 0), @@ -216,7 +216,7 @@ emit_so_memcpy(struct anv_batch *batch, struct anv_device *device, #if GFX_VERx10 == 125 /* Wa_14015946265: Send PC with CS stall after SO_DECL. */ - genx_batch_emit_pipe_control(batch, device->info, ANV_PIPE_CS_STALL_BIT); + genx_batch_emit_pipe_control(batch, device->info, _3D, ANV_PIPE_CS_STALL_BIT); #endif anv_batch_emit(batch, GENX(3DSTATE_STREAMOUT), so) { @@ -273,7 +273,7 @@ void genX(emit_so_memcpy_end)(struct anv_memcpy_state *state) { if (intel_device_info_is_dg2(state->device->info)) - genX(batch_set_preemption)(state->batch, state->device->info, true); + genX(batch_set_preemption)(state->batch, state->device->info, _3D, true); anv_batch_emit(state->batch, GENX(MI_BATCH_BUFFER_END), end); diff --git a/src/intel/vulkan/genX_init_state.c b/src/intel/vulkan/genX_init_state.c index c0ca960059c..7d032566b3d 100644 --- a/src/intel/vulkan/genX_init_state.c +++ b/src/intel/vulkan/genX_init_state.c @@ -210,6 +210,7 @@ init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch) * both the cases set Render Target Cache Flush Enable". */ genx_batch_emit_pipe_control(batch, device->info, + 0, ANV_PIPE_CS_STALL_BIT | ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT); #endif @@ -573,7 +574,7 @@ init_render_queue_state(struct anv_queue *queue, bool is_companion_rcs_batch) anv_batch_emit(&batch, GENX(STATE_COMPUTE_MODE), zero); anv_batch_emit(&batch, GENX(3DSTATE_MESH_CONTROL), zero); anv_batch_emit(&batch, GENX(3DSTATE_TASK_CONTROL), zero); - genx_batch_emit_pipe_control_write(&batch, device->info, NoWrite, + genx_batch_emit_pipe_control_write(&batch, device->info, _3D, NoWrite, ANV_NULL_ADDRESS, 0, ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS); @@ -582,7 +583,7 @@ init_render_queue_state(struct anv_queue *queue, bool is_companion_rcs_batch) cfe.MaximumNumberofThreads = devinfo->max_cs_threads * devinfo->subslice_total; } - genx_batch_emit_pipe_control_write(&batch, device->info, NoWrite, + genx_batch_emit_pipe_control_write(&batch, device->info, _3D, NoWrite, ANV_NULL_ADDRESS, 0, ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS); @@ -633,7 +634,7 @@ init_compute_queue_state(struct anv_queue *queue) */ if (intel_needs_workaround(devinfo, 14015782607) && queue->family->engine_class == INTEL_ENGINE_CLASS_COMPUTE) { - genx_batch_emit_pipe_control(&batch, devinfo, + genx_batch_emit_pipe_control(&batch, devinfo, GPGPU, ANV_PIPE_CS_STALL_BIT | ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT | ANV_PIPE_HDC_PIPELINE_FLUSH_BIT); @@ -646,7 +647,7 @@ init_compute_queue_state(struct anv_queue *queue) if (intel_device_info_is_atsm(devinfo) && queue->family->engine_class == INTEL_ENGINE_CLASS_COMPUTE) { genx_batch_emit_pipe_control - (&batch, devinfo, + (&batch, devinfo, GPGPU, ANV_PIPE_CS_STALL_BIT | ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | @@ -1181,6 +1182,7 @@ genX(apply_task_urb_workaround)(struct anv_cmd_buffer *cmd_buffer) /* Issue 'nullprim' to commit the state. */ genx_batch_emit_pipe_control_write (&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, WriteImmediateData, cmd_buffer->device->workaround_address, 0, 0); #endif } diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index f42298f85ab..cb99830a7dd 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -673,7 +673,8 @@ emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer, bool cs_stall_needed = (GFX_VER == 9 && cmd_buffer->device->info->gt == 4); genx_batch_emit_pipe_control_write - (&cmd_buffer->batch, cmd_buffer->device->info, WritePSDepthCount, addr, 0, + (&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, WritePSDepthCount, addr, 0, ANV_PIPE_DEPTH_STALL_BIT | (cs_stall_needed ? ANV_PIPE_CS_STALL_BIT : 0)); } @@ -694,7 +695,8 @@ emit_query_pc_availability(struct anv_cmd_buffer *cmd_buffer, genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); genx_batch_emit_pipe_control_write - (&cmd_buffer->batch, cmd_buffer->device->info, WriteImmediateData, addr, + (&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, WriteImmediateData, addr, available, ANV_PIPE_CS_STALL_BIT); } @@ -1023,6 +1025,7 @@ void genX(CmdBeginQueryIndexedEXT)( case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, ANV_PIPE_CS_STALL_BIT | ANV_PIPE_STALL_AT_SCOREBOARD_BIT); mi_store(&b, mi_mem64(anv_address_add(query_addr, 8)), @@ -1033,6 +1036,7 @@ void genX(CmdBeginQueryIndexedEXT)( /* TODO: This might only be necessary for certain stats */ genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, ANV_PIPE_CS_STALL_BIT | ANV_PIPE_STALL_AT_SCOREBOARD_BIT); @@ -1049,6 +1053,7 @@ void genX(CmdBeginQueryIndexedEXT)( case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, ANV_PIPE_CS_STALL_BIT | ANV_PIPE_STALL_AT_SCOREBOARD_BIT); emit_xfb_query(&b, index, anv_address_add(query_addr, 8)); @@ -1108,6 +1113,7 @@ void genX(CmdBeginQueryIndexedEXT)( genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, ANV_PIPE_CS_STALL_BIT | ANV_PIPE_STALL_AT_SCOREBOARD_BIT); cmd_buffer->perf_query_pool = pool; @@ -1170,6 +1176,7 @@ void genX(CmdBeginQueryIndexedEXT)( case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: { genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, ANV_PIPE_CS_STALL_BIT | ANV_PIPE_STALL_AT_SCOREBOARD_BIT); emit_perf_intel_query(cmd_buffer, pool, &b, query_addr, false); @@ -1210,6 +1217,7 @@ void genX(CmdEndQueryIndexedEXT)( */ genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, ANV_PIPE_CS_STALL_BIT | ANV_PIPE_STALL_AT_SCOREBOARD_BIT); @@ -1222,6 +1230,7 @@ void genX(CmdEndQueryIndexedEXT)( /* TODO: This might only be necessary for certain stats */ genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, ANV_PIPE_CS_STALL_BIT | ANV_PIPE_STALL_AT_SCOREBOARD_BIT); @@ -1240,6 +1249,7 @@ void genX(CmdEndQueryIndexedEXT)( case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, ANV_PIPE_CS_STALL_BIT | ANV_PIPE_STALL_AT_SCOREBOARD_BIT); emit_xfb_query(&b, index, anv_address_add(query_addr, 16)); @@ -1249,6 +1259,7 @@ void genX(CmdEndQueryIndexedEXT)( case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: { genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, ANV_PIPE_CS_STALL_BIT | ANV_PIPE_STALL_AT_SCOREBOARD_BIT); cmd_buffer->perf_query_pool = pool; @@ -1327,6 +1338,7 @@ void genX(CmdEndQueryIndexedEXT)( case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: { genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, ANV_PIPE_CS_STALL_BIT | ANV_PIPE_STALL_AT_SCOREBOARD_BIT); uint32_t marker_offset = intel_perf_marker_offset(); @@ -1392,7 +1404,8 @@ void genX(CmdWriteTimestamp2)( bool cs_stall_needed = (GFX_VER == 9 && cmd_buffer->device->info->gt == 4); genx_batch_emit_pipe_control_write - (&cmd_buffer->batch, cmd_buffer->device->info, WriteTimestamp, + (&cmd_buffer->batch, cmd_buffer->device->info, + cmd_buffer->state.current_pipeline, WriteTimestamp, anv_address_add(query_addr, 8), 0, cs_stall_needed ? ANV_PIPE_CS_STALL_BIT : 0);