diff --git a/.pick_status.json b/.pick_status.json index c723b3a9c40..d5c461d32db 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1244,7 +1244,7 @@ "description": "radeonsi: update si_need_gfx_cs_space upper bound", "nominated": true, "nomination_type": 2, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "cdecbee9225f98a5529a55964f1f9758c4f893b5", "notes": null diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 83cf1003227..c4a85c24007 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -1224,7 +1224,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info } } - si_need_gfx_cs_space(sctx, 0); + si_need_gfx_cs_space(sctx, 0, 0); /* If we're using a secure context, determine if cs must be secure or not */ if (unlikely(radeon_uses_secure_bos(sctx->ws))) { diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index a398e47bb60..7063a950cb9 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -107,7 +107,7 @@ static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst struct pipe_resource *src, unsigned byte_count, uint64_t remaining_size, bool *is_first, unsigned *packet_flags) { - si_need_gfx_cs_space(sctx, 0); + si_need_gfx_cs_space(sctx, 0, 0); /* This must be done after need_cs_space. */ radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(dst), diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index 785c98a9dc9..806a842a08d 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -277,7 +277,7 @@ static void si_pc_query_resume(struct si_context *sctx, struct si_query *squery) if (!si_query_buffer_alloc(sctx, &query->buffer, NULL, query->result_size)) return; - si_need_gfx_cs_space(sctx, 0); + si_need_gfx_cs_space(sctx, 0, 0); if (query->shaders) si_pc_emit_shaders(&sctx->gfx_cs, query->shaders); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 832bd098b86..fdc7814a1fc 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -2005,7 +2005,8 @@ static inline bool util_rast_prim_is_triangles(unsigned prim) return ((1 << prim) & UTIL_ALL_PRIM_TRIANGLE_MODES) != 0; } -static inline void si_need_gfx_cs_space(struct si_context *ctx, unsigned num_draws) +static inline void si_need_gfx_cs_space(struct si_context *ctx, unsigned num_draws, + unsigned extra_dw_per_draw) { struct radeon_cmdbuf *cs = &ctx->gfx_cs; /* Don't count the needed CS space exactly and just use an upper bound. @@ -2013,7 +2014,8 @@ static inline void si_need_gfx_cs_space(struct si_context *ctx, unsigned num_dra * Also reserve space for stopping queries at the end of IB, because * the number of active queries is unlimited in theory. */ - unsigned reserve_dw = 2048 + ctx->num_cs_dw_queries_suspend + num_draws * 10; + unsigned reserve_dw = 2048 + ctx->num_cs_dw_queries_suspend + + num_draws * (10 + extra_dw_per_draw); if (!ctx->ws->cs_check_space(cs, reserve_dw)) si_flush_gfx_cs(ctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c index 4a48b72383c..7d194fc9c4a 100644 --- a/src/gallium/drivers/radeonsi/si_query.c +++ b/src/gallium/drivers/radeonsi/si_query.c @@ -919,7 +919,7 @@ static void si_query_hw_emit_start(struct si_context *sctx, struct si_query_hw * si_update_prims_generated_query_state(sctx, query->b.type, 1); si_update_hw_pipeline_stats(sctx, query->b.type, 1); - si_need_gfx_cs_space(sctx, 0); + si_need_gfx_cs_space(sctx, 0, 0); va = query->buffer.buf->gpu_address + query->buffer.results_end; si_query_hw_do_emit_start(sctx, query, query->buffer.buf, va); @@ -1015,7 +1015,7 @@ static void si_query_hw_emit_stop(struct si_context *sctx, struct si_query_hw *q /* The queries which need begin already called this in begin_query. */ if (query->flags & SI_QUERY_HW_FLAG_NO_START) { - si_need_gfx_cs_space(sctx, 0); + si_need_gfx_cs_space(sctx, 0, 0); if (!si_query_buffer_alloc(sctx, &query->buffer, si_query_hw_prepare_buffer, query->result_size)) return; @@ -1728,7 +1728,7 @@ void si_resume_queries(struct si_context *sctx) struct si_query *query; /* Check CS space here. Resuming must not be interrupted by flushes. */ - si_need_gfx_cs_space(sctx, 0); + si_need_gfx_cs_space(sctx, 0, 0); LIST_FOR_EACH_ENTRY (query, &sctx->active_queries, active_list) query->ops->resume(sctx, query); diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index 356f604094f..eeb72198656 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -2071,7 +2071,7 @@ static void si_draw(struct pipe_context *ctx, else if (GFX_VERSION < GFX12) gfx11_decompress_textures(sctx, u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS)); - si_need_gfx_cs_space(sctx, num_draws); + si_need_gfx_cs_space(sctx, num_draws, ALT_HIZ_LOGIC ? 8 : 0); if (u_trace_perfetto_active(&sctx->ds.trace_context)) trace_si_begin_draw(&sctx->trace);