mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-23 08:38:08 +02:00
panvk: Request resources during subqueue init
REQ_RES is a quite expensive operation, so calling it before and after each RUN-command slows down RUN-command heavy workloads. This commit moves REQ_RES calls to subqueue_init. Reviewed-by: John Anthony <john.anthony@arm.com> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Tested-by: Heiko Stuebner <heiko@sntech.de> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33512>
This commit is contained in:
parent
e12ddbfd78
commit
626e9e4179
5 changed files with 17 additions and 16 deletions
|
|
@ -315,7 +315,6 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
|
|||
|
||||
panvk_per_arch(cs_pick_iter_sb)(cmdbuf, PANVK_SUBQUEUE_COMPUTE);
|
||||
|
||||
cs_req_res(b, CS_COMPUTE_RES);
|
||||
if (indirect) {
|
||||
/* Use run_compute with a set task axis instead of run_compute_indirect as
|
||||
* run_compute_indirect has been found to cause intermittent hangs. This
|
||||
|
|
@ -337,7 +336,6 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
|
|||
task_increment, task_axis,
|
||||
cs_shader_res_sel(0, 0, 0, 0));
|
||||
}
|
||||
cs_req_res(b, 0);
|
||||
|
||||
struct cs_index sync_addr = cs_scratch_reg64(b, 0);
|
||||
struct cs_index iter_sb = cs_scratch_reg32(b, 2);
|
||||
|
|
|
|||
|
|
@ -2074,7 +2074,6 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
|
|||
uint32_t idvs_count = DIV_ROUND_UP(cmdbuf->state.gfx.render.layer_count,
|
||||
MAX_LAYERS_PER_TILER_DESC);
|
||||
|
||||
cs_req_res(b, CS_IDVS_RES);
|
||||
if (idvs_count > 1) {
|
||||
struct cs_index counter_reg = cs_scratch_reg32(b, 17);
|
||||
struct cs_index tiler_ctx_addr = cs_sr_reg64(b, IDVS, TILER_CTX);
|
||||
|
|
@ -2116,7 +2115,6 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
|
|||
cs_shader_res_sel(2, 2, 2, 0), cs_undef());
|
||||
#endif
|
||||
}
|
||||
cs_req_res(b, 0);
|
||||
}
|
||||
|
||||
VkResult
|
||||
|
|
@ -2276,8 +2274,6 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
|||
cs_move64_to(b, draw_params_addr, draw->indirect.buffer_dev_addr);
|
||||
cs_move32_to(b, draw_id, 0);
|
||||
|
||||
cs_req_res(b, CS_IDVS_RES);
|
||||
|
||||
cs_while(b, MALI_CS_CONDITION_GREATER, draw_count) {
|
||||
cs_update_vt_ctx(b) {
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, GLOBAL_ATTRIBUTE_OFFSET), 0);
|
||||
|
|
@ -2400,8 +2396,6 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
cs_req_res(b, 0);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
|
|
@ -2625,9 +2619,7 @@ flush_tiling(struct panvk_cmd_buffer *cmdbuf)
|
|||
|
||||
if (cmdbuf->state.gfx.render.tiler || inherits_render_ctx(cmdbuf)) {
|
||||
/* Flush the tiling operations and signal the internal sync object. */
|
||||
cs_req_res(b, CS_TILER_RES);
|
||||
cs_finish_tiling(b);
|
||||
cs_req_res(b, 0);
|
||||
|
||||
struct cs_index sync_addr = cs_scratch_reg64(b, 0);
|
||||
struct cs_index iter_sb = cs_scratch_reg32(b, 2);
|
||||
|
|
@ -2856,7 +2848,6 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_wait_slot(b, SB_ID(IMM_FLUSH));
|
||||
}
|
||||
|
||||
cs_req_res(b, CS_FRAG_RES);
|
||||
if (cmdbuf->state.gfx.render.layer_count > 1) {
|
||||
struct cs_index layer_count = cs_reg32(b, 47);
|
||||
|
||||
|
|
@ -2874,7 +2865,6 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
|||
cs_trace_run_fragment(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
|
||||
false, MALI_TILE_RENDER_ORDER_Z_ORDER);
|
||||
}
|
||||
cs_req_res(b, 0);
|
||||
|
||||
struct cs_index sync_addr = cs_scratch_reg64(b, 0);
|
||||
struct cs_index iter_sb = cs_scratch_reg32(b, 2);
|
||||
|
|
|
|||
|
|
@ -140,7 +140,6 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
|
|||
|
||||
panvk_per_arch(cs_pick_iter_sb)(cmdbuf, PANVK_SUBQUEUE_COMPUTE);
|
||||
|
||||
cs_req_res(b, CS_COMPUTE_RES);
|
||||
unsigned task_axis = MALI_TASK_AXIS_X;
|
||||
unsigned task_increment = 0;
|
||||
panvk_per_arch(calculate_task_axis_and_increment)(
|
||||
|
|
@ -148,7 +147,6 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
|
|||
cs_trace_run_compute(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
|
||||
task_increment, task_axis,
|
||||
cs_shader_res_sel(0, 0, 0, 0));
|
||||
cs_req_res(b, 0);
|
||||
|
||||
struct cs_index sync_addr = cs_scratch_reg64(b, 0);
|
||||
struct cs_index iter_sb = cs_scratch_reg32(b, 2);
|
||||
|
|
|
|||
|
|
@ -89,14 +89,12 @@ generate_tiler_oom_handler(struct panvk_device *dev,
|
|||
TILER_OOM_CTX_FIELD_OFFSET(layer_count));
|
||||
cs_wait_slot(&b, SB_ID(LS));
|
||||
|
||||
cs_req_res(&b, CS_FRAG_RES);
|
||||
cs_while(&b, MALI_CS_CONDITION_GREATER, layer_count) {
|
||||
cs_trace_run_fragment(&b, &tracing_ctx, cs_scratch_reg_tuple(&b, 8, 4),
|
||||
false, MALI_TILE_RENDER_ORDER_Z_ORDER);
|
||||
cs_add32(&b, layer_count, layer_count, -1);
|
||||
cs_add64(&b, fbd_ptr, fbd_ptr, fbd_size);
|
||||
}
|
||||
cs_req_res(&b, 0);
|
||||
/* Wait for all iter scoreboards for simplicity. */
|
||||
cs_wait_slots(&b, SB_ALL_ITERS_MASK);
|
||||
|
||||
|
|
|
|||
|
|
@ -446,6 +446,23 @@ init_subqueue(struct panvk_queue *queue, enum panvk_subqueue_id subqueue)
|
|||
cs_heap_set(&b, heap_ctx_addr);
|
||||
}
|
||||
|
||||
/* Request resources for each subqueue during initialization, as the req_res
|
||||
* is an expensive operation which should be called sparingly. */
|
||||
switch (subqueue) {
|
||||
case PANVK_SUBQUEUE_VERTEX_TILER:
|
||||
cs_req_res(&b, CS_IDVS_RES | CS_TILER_RES);
|
||||
break;
|
||||
case PANVK_SUBQUEUE_FRAGMENT:
|
||||
cs_req_res(&b, CS_FRAG_RES);
|
||||
break;
|
||||
case PANVK_SUBQUEUE_COMPUTE:
|
||||
cs_req_res(&b, CS_COMPUTE_RES);
|
||||
break;
|
||||
default:
|
||||
unreachable("Unknown subqueue");
|
||||
break;
|
||||
}
|
||||
|
||||
cs_finish(&b);
|
||||
|
||||
assert(cs_is_valid(&b));
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue