panvk: Request resources during subqueue init

REQ_RES is a quite expensive operation, so calling it before and after
each RUN-command slows down RUN-command heavy workloads.

This commit moves REQ_RES calls to subqueue_init.

Reviewed-by: John Anthony <john.anthony@arm.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Tested-by: Heiko Stuebner <heiko@sntech.de>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33512>
This commit is contained in:
Lars-Ivar Hesselberg Simonsen 2025-02-12 13:06:39 +01:00 committed by Marge Bot
parent e12ddbfd78
commit 626e9e4179
5 changed files with 17 additions and 16 deletions

View file

@ -315,7 +315,6 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
panvk_per_arch(cs_pick_iter_sb)(cmdbuf, PANVK_SUBQUEUE_COMPUTE);
cs_req_res(b, CS_COMPUTE_RES);
if (indirect) {
/* Use run_compute with a set task axis instead of run_compute_indirect as
* run_compute_indirect has been found to cause intermittent hangs. This
@ -337,7 +336,6 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
task_increment, task_axis,
cs_shader_res_sel(0, 0, 0, 0));
}
cs_req_res(b, 0);
struct cs_index sync_addr = cs_scratch_reg64(b, 0);
struct cs_index iter_sb = cs_scratch_reg32(b, 2);

View file

@ -2074,7 +2074,6 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
uint32_t idvs_count = DIV_ROUND_UP(cmdbuf->state.gfx.render.layer_count,
MAX_LAYERS_PER_TILER_DESC);
cs_req_res(b, CS_IDVS_RES);
if (idvs_count > 1) {
struct cs_index counter_reg = cs_scratch_reg32(b, 17);
struct cs_index tiler_ctx_addr = cs_sr_reg64(b, IDVS, TILER_CTX);
@ -2116,7 +2115,6 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
cs_shader_res_sel(2, 2, 2, 0), cs_undef());
#endif
}
cs_req_res(b, 0);
}
VkResult
@ -2276,8 +2274,6 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
cs_move64_to(b, draw_params_addr, draw->indirect.buffer_dev_addr);
cs_move32_to(b, draw_id, 0);
cs_req_res(b, CS_IDVS_RES);
cs_while(b, MALI_CS_CONDITION_GREATER, draw_count) {
cs_update_vt_ctx(b) {
cs_move32_to(b, cs_sr_reg32(b, IDVS, GLOBAL_ATTRIBUTE_OFFSET), 0);
@ -2400,8 +2396,6 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
}
}
}
cs_req_res(b, 0);
}
VKAPI_ATTR void VKAPI_CALL
@ -2625,9 +2619,7 @@ flush_tiling(struct panvk_cmd_buffer *cmdbuf)
if (cmdbuf->state.gfx.render.tiler || inherits_render_ctx(cmdbuf)) {
/* Flush the tiling operations and signal the internal sync object. */
cs_req_res(b, CS_TILER_RES);
cs_finish_tiling(b);
cs_req_res(b, 0);
struct cs_index sync_addr = cs_scratch_reg64(b, 0);
struct cs_index iter_sb = cs_scratch_reg32(b, 2);
@ -2856,7 +2848,6 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
cs_wait_slot(b, SB_ID(IMM_FLUSH));
}
cs_req_res(b, CS_FRAG_RES);
if (cmdbuf->state.gfx.render.layer_count > 1) {
struct cs_index layer_count = cs_reg32(b, 47);
@ -2874,7 +2865,6 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
cs_trace_run_fragment(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
false, MALI_TILE_RENDER_ORDER_Z_ORDER);
}
cs_req_res(b, 0);
struct cs_index sync_addr = cs_scratch_reg64(b, 0);
struct cs_index iter_sb = cs_scratch_reg32(b, 2);

View file

@ -140,7 +140,6 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
panvk_per_arch(cs_pick_iter_sb)(cmdbuf, PANVK_SUBQUEUE_COMPUTE);
cs_req_res(b, CS_COMPUTE_RES);
unsigned task_axis = MALI_TASK_AXIS_X;
unsigned task_increment = 0;
panvk_per_arch(calculate_task_axis_and_increment)(
@ -148,7 +147,6 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
cs_trace_run_compute(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
task_increment, task_axis,
cs_shader_res_sel(0, 0, 0, 0));
cs_req_res(b, 0);
struct cs_index sync_addr = cs_scratch_reg64(b, 0);
struct cs_index iter_sb = cs_scratch_reg32(b, 2);

View file

@ -89,14 +89,12 @@ generate_tiler_oom_handler(struct panvk_device *dev,
TILER_OOM_CTX_FIELD_OFFSET(layer_count));
cs_wait_slot(&b, SB_ID(LS));
cs_req_res(&b, CS_FRAG_RES);
cs_while(&b, MALI_CS_CONDITION_GREATER, layer_count) {
cs_trace_run_fragment(&b, &tracing_ctx, cs_scratch_reg_tuple(&b, 8, 4),
false, MALI_TILE_RENDER_ORDER_Z_ORDER);
cs_add32(&b, layer_count, layer_count, -1);
cs_add64(&b, fbd_ptr, fbd_ptr, fbd_size);
}
cs_req_res(&b, 0);
/* Wait for all iter scoreboards for simplicity. */
cs_wait_slots(&b, SB_ALL_ITERS_MASK);

View file

@ -446,6 +446,23 @@ init_subqueue(struct panvk_queue *queue, enum panvk_subqueue_id subqueue)
cs_heap_set(&b, heap_ctx_addr);
}
/* Request resources for each subqueue during initialization, as the req_res
* is an expensive operation which should be called sparingly. */
switch (subqueue) {
case PANVK_SUBQUEUE_VERTEX_TILER:
cs_req_res(&b, CS_IDVS_RES | CS_TILER_RES);
break;
case PANVK_SUBQUEUE_FRAGMENT:
cs_req_res(&b, CS_FRAG_RES);
break;
case PANVK_SUBQUEUE_COMPUTE:
cs_req_res(&b, CS_COMPUTE_RES);
break;
default:
unreachable("Unknown subqueue");
break;
}
cs_finish(&b);
assert(cs_is_valid(&b));