mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-23 05:10:36 +02:00
hk: merge adjacent CDM control streams
this reduces submission overhead in the kernel/firmware. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35662>
This commit is contained in:
parent
f7db4afd09
commit
43a47266c8
5 changed files with 73 additions and 5 deletions
|
|
@ -75,6 +75,7 @@ static const struct debug_named_value agx_debug_options[] = {
|
|||
{"scratch", AGX_DBG_SCRATCH, "Debug scratch memory usage"},
|
||||
{"1queue", AGX_DBG_1QUEUE, "Force usage of a single queue for multiple contexts"},
|
||||
{"nosoft", AGX_DBG_NOSOFT, "Disable soft fault optimizations"},
|
||||
{"nomerge", AGX_DBG_NOMERGE, "Disable control stream merging"},
|
||||
{"bodumpverbose", AGX_DBG_BODUMPVERBOSE, "Include extra info with dumps"},
|
||||
DEBUG_NAMED_VALUE_END
|
||||
};
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ enum agx_dbg {
|
|||
AGX_DBG_NOSOFT = BITFIELD_BIT(19),
|
||||
AGX_DBG_FEEDBACK = BITFIELD_BIT(20),
|
||||
AGX_DBG_1QUEUE = BITFIELD_BIT(21),
|
||||
AGX_DBG_NOMERGE = BITFIELD_BIT(22),
|
||||
};
|
||||
|
||||
/* How many power-of-two levels in the BO cache do we want? 2^14 minimum chosen
|
||||
|
|
|
|||
|
|
@ -259,6 +259,32 @@ hk_BeginCommandBuffer(VkCommandBuffer commandBuffer,
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Merge adjacent compute control streams. Except for reading timestamps, there
|
||||
* is no reason to submit two CDM streams back-to-back in the same command
|
||||
* buffer. However, it is challenging to avoid constructing such sequences due
|
||||
* to the gymnastics required to reorder compute around graphics. Merging at
|
||||
* EndCommandBuffer is cheap O(# of control streams) and lets us get away with
|
||||
* the sloppiness.
|
||||
*/
|
||||
static void
|
||||
merge_control_streams(struct hk_cmd_buffer *cmd)
|
||||
{
|
||||
struct hk_cs *last = NULL;
|
||||
|
||||
list_for_each_entry_safe(struct hk_cs, cs, &cmd->control_streams, node) {
|
||||
if (cs->type == HK_CS_CDM && last && last->type == HK_CS_CDM &&
|
||||
!last->timestamp.end.handle) {
|
||||
|
||||
hk_cs_merge_cdm(last, cs);
|
||||
list_del(&cs->node);
|
||||
hk_cs_destroy(cs);
|
||||
} else {
|
||||
last = cs;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_EndCommandBuffer(VkCommandBuffer commandBuffer)
|
||||
{
|
||||
|
|
@ -271,6 +297,21 @@ hk_EndCommandBuffer(VkCommandBuffer commandBuffer)
|
|||
hk_cmd_buffer_end_compute(cmd);
|
||||
hk_cmd_buffer_end_compute_internal(cmd, &cmd->current_cs.post_gfx);
|
||||
|
||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||
if (likely(!(dev->dev.debug & AGX_DBG_NOMERGE))) {
|
||||
merge_control_streams(cmd);
|
||||
}
|
||||
|
||||
/* We cannot terminate CDM control streams until after merging, since merging
|
||||
* needs to append stream links late. Now that we've merged, insert all the
|
||||
* missing stream terminates.
|
||||
*/
|
||||
list_for_each_entry(struct hk_cs, cs, &cmd->control_streams, node) {
|
||||
if (cs->type == HK_CS_CDM) {
|
||||
cs->current = agx_cdm_terminate(cs->current);
|
||||
}
|
||||
}
|
||||
|
||||
return vk_command_buffer_get_record_result(&cmd->vk);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -370,7 +370,7 @@ struct hk_cs {
|
|||
|
||||
/* Statistics */
|
||||
struct {
|
||||
uint32_t calls, cmds, flushes;
|
||||
uint32_t calls, cmds, flushes, merged;
|
||||
} stats;
|
||||
|
||||
/* Timestamp writes. Currently just compute end / fragment end. We could
|
||||
|
|
@ -406,6 +406,32 @@ struct hk_cs {
|
|||
uint32_t restart_index;
|
||||
};
|
||||
|
||||
/*
|
||||
* Helper to merge two compute control streams, concatenating the second control
|
||||
* stream to the first one. Must sync with hk_cs.
|
||||
*/
|
||||
static inline void
|
||||
hk_cs_merge_cdm(struct hk_cs *a, const struct hk_cs *b)
|
||||
{
|
||||
assert(a->type == HK_CS_CDM && b->type == HK_CS_CDM);
|
||||
assert(a->cmd == b->cmd);
|
||||
assert(!a->timestamp.end.handle);
|
||||
|
||||
agx_cdm_jump(a->current, b->addr);
|
||||
a->current = b->current;
|
||||
a->stream_linked = true;
|
||||
|
||||
a->scratch.cs.main |= b->scratch.cs.main;
|
||||
a->scratch.cs.preamble |= b->scratch.cs.preamble;
|
||||
|
||||
a->timestamp = b->timestamp;
|
||||
|
||||
a->stats.calls += b->stats.calls;
|
||||
a->stats.cmds += b->stats.cmds;
|
||||
a->stats.flushes += b->stats.flushes;
|
||||
a->stats.merged++;
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
hk_cs_current_addr(struct hk_cs *cs)
|
||||
{
|
||||
|
|
@ -660,8 +686,6 @@ hk_cmd_buffer_end_compute_internal(struct hk_cmd_buffer *cmd,
|
|||
if (cs->imm_writes.size) {
|
||||
hk_dispatch_imm_writes(cmd, cs);
|
||||
}
|
||||
|
||||
cs->current = agx_cdm_terminate(cs->current);
|
||||
}
|
||||
|
||||
*ptr = NULL;
|
||||
|
|
|
|||
|
|
@ -838,8 +838,9 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue,
|
|||
if (cs->type == HK_CS_CDM) {
|
||||
perf_debug(
|
||||
cmdbuf,
|
||||
"%u: Submitting CDM with %u API calls, %u dispatches, %u flushes",
|
||||
i, cs->stats.calls, cs->stats.cmds, cs->stats.flushes);
|
||||
"%u: Submitting CDM with %u API calls, %u dispatches, %u flushes, %u merged",
|
||||
i, cs->stats.calls, cs->stats.cmds, cs->stats.flushes,
|
||||
cs->stats.merged);
|
||||
|
||||
assert(cs->stats.cmds > 0 || cs->stats.flushes > 0 ||
|
||||
cs->timestamp.end.handle);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue