mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 07:28:11 +02:00
hk: parallelize after-graphics available sets
reduces cdm overhead. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31532>
This commit is contained in:
parent
a2edffad2f
commit
ad8f005ecb
6 changed files with 87 additions and 6 deletions
|
|
@ -109,3 +109,9 @@ libagx_increment_ia_counters(constant struct libagx_increment_ia_counters *p,
|
|||
*(p->vs_invocations) += count;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
libagx_write_u32s(constant struct libagx_imm_write *p, uint id)
|
||||
{
|
||||
*(p[id].address) = p[id].value;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -67,3 +67,8 @@ struct libagx_increment_ia_counters {
|
|||
uint32_t index_buffer_range_el;
|
||||
uint32_t restart_index;
|
||||
};
|
||||
|
||||
struct libagx_imm_write {
|
||||
GLOBAL(uint32_t) address;
|
||||
uint32_t value;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -269,7 +269,7 @@ hk_EndCommandBuffer(VkCommandBuffer commandBuffer)
|
|||
|
||||
perf_debug(dev, "End command buffer");
|
||||
hk_cmd_buffer_end_compute(cmd);
|
||||
hk_cmd_buffer_end_compute_internal(&cmd->current_cs.post_gfx);
|
||||
hk_cmd_buffer_end_compute_internal(cmd, &cmd->current_cs.post_gfx);
|
||||
|
||||
/* With rasterizer discard, we might end up with empty VDM batches.
|
||||
* It is difficult to avoid creating these empty batches, but it's easy to
|
||||
|
|
|
|||
|
|
@ -325,6 +325,11 @@ struct hk_cs {
|
|||
struct hk_scratch_req fs;
|
||||
} scratch;
|
||||
|
||||
/* Immediate writes, type libagx_imm_write. These all happen in parallel at
|
||||
* the end of the control stream. This accelerates queries. Implies CDM.
|
||||
*/
|
||||
struct util_dynarray imm_writes;
|
||||
|
||||
/* Statistics */
|
||||
struct {
|
||||
uint32_t calls, cmds, flushes;
|
||||
|
|
@ -565,16 +570,29 @@ hk_cs_destroy(struct hk_cs *cs)
|
|||
if (cs->type == HK_CS_VDM) {
|
||||
util_dynarray_fini(&cs->scissor);
|
||||
util_dynarray_fini(&cs->depth_bias);
|
||||
} else {
|
||||
util_dynarray_fini(&cs->imm_writes);
|
||||
}
|
||||
|
||||
free(cs);
|
||||
}
|
||||
|
||||
void hk_dispatch_imm_writes(struct hk_cmd_buffer *cmd, struct hk_cs *cs);
|
||||
|
||||
static void
|
||||
hk_cmd_buffer_end_compute_internal(struct hk_cs **ptr)
|
||||
hk_cmd_buffer_end_compute_internal(struct hk_cmd_buffer *cmd,
|
||||
struct hk_cs **ptr)
|
||||
{
|
||||
if (*ptr) {
|
||||
struct hk_cs *cs = *ptr;
|
||||
|
||||
/* This control stream may write immediates as it ends. Queue the writes
|
||||
* now that we're done emitting everything else.
|
||||
*/
|
||||
if (cs->imm_writes.size) {
|
||||
hk_dispatch_imm_writes(cmd, cs);
|
||||
}
|
||||
|
||||
void *map = cs->current;
|
||||
agx_push(map, CDM_STREAM_TERMINATE, _)
|
||||
;
|
||||
|
|
@ -588,7 +606,7 @@ hk_cmd_buffer_end_compute_internal(struct hk_cs **ptr)
|
|||
static void
|
||||
hk_cmd_buffer_end_compute(struct hk_cmd_buffer *cmd)
|
||||
{
|
||||
hk_cmd_buffer_end_compute_internal(&cmd->current_cs.cs);
|
||||
hk_cmd_buffer_end_compute_internal(cmd, &cmd->current_cs.cs);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -615,8 +633,8 @@ hk_cmd_buffer_end_graphics(struct hk_cmd_buffer *cmd)
|
|||
|
||||
cmd->current_cs.gfx->current = map;
|
||||
cmd->current_cs.gfx = NULL;
|
||||
hk_cmd_buffer_end_compute_internal(&cmd->current_cs.pre_gfx);
|
||||
hk_cmd_buffer_end_compute_internal(&cmd->current_cs.post_gfx);
|
||||
hk_cmd_buffer_end_compute_internal(cmd, &cmd->current_cs.pre_gfx);
|
||||
hk_cmd_buffer_end_compute_internal(cmd, &cmd->current_cs.post_gfx);
|
||||
}
|
||||
|
||||
assert(cmd->current_cs.gfx == NULL);
|
||||
|
|
|
|||
|
|
@ -752,7 +752,7 @@ hk_CmdBeginRendering(VkCommandBuffer commandBuffer,
|
|||
cs->cr.zls_control = render->cr.zls_control;
|
||||
|
||||
/* Reordering barrier for post-gfx, in case we had any. */
|
||||
hk_cmd_buffer_end_compute_internal(&cmd->current_cs.post_gfx);
|
||||
hk_cmd_buffer_end_compute_internal(cmd, &cmd->current_cs.post_gfx);
|
||||
|
||||
/* Don't reorder compute across render passes.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@
|
|||
#include "compiler/nir/nir_builder.h"
|
||||
|
||||
#include "util/os_time.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "vulkan/vulkan_core.h"
|
||||
|
||||
struct hk_query_report {
|
||||
|
|
@ -209,6 +210,45 @@ hk_nir_write_u32(nir_builder *b, UNUSED const void *key)
|
|||
nir_store_global(b, addr, 4, value, nir_component_mask(1));
|
||||
}
|
||||
|
||||
static void
|
||||
hk_nir_write_u32s(nir_builder *b, const void *data)
|
||||
{
|
||||
nir_def *params = nir_load_preamble(b, 1, 64, .base = 0);
|
||||
nir_def *id = nir_channel(b, nir_load_global_invocation_id(b, 32), 0);
|
||||
|
||||
libagx_write_u32s(b, params, id);
|
||||
}
|
||||
|
||||
void
|
||||
hk_dispatch_imm_writes(struct hk_cmd_buffer *cmd, struct hk_cs *cs)
|
||||
{
|
||||
hk_ensure_cs_has_space(cmd, cs, 0x2000 /* TODO */);
|
||||
|
||||
/* As soon as we mark a query available, it needs to be available system
|
||||
* wide, otherwise a CPU-side get result can query. As such, we cache flush
|
||||
* before and then let coherency works its magic. Without this barrier, we
|
||||
* get flakes in
|
||||
*
|
||||
* dEQP-VK.query_pool.occlusion_query.get_results_conservative_size_64_wait_query_without_availability_draw_triangles_discard
|
||||
*/
|
||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||
hk_cdm_cache_flush(dev, cs);
|
||||
|
||||
perf_debug(dev, "Queued writes");
|
||||
|
||||
struct hk_shader *s = hk_meta_kernel(dev, hk_nir_write_u32s, NULL, 0);
|
||||
uint64_t params =
|
||||
hk_pool_upload(cmd, cs->imm_writes.data, cs->imm_writes.size, 16);
|
||||
uint32_t usc = hk_upload_usc_words_kernel(cmd, s, ¶ms, sizeof(params));
|
||||
|
||||
uint32_t count =
|
||||
util_dynarray_num_elements(&cs->imm_writes, struct libagx_imm_write);
|
||||
assert(count > 0);
|
||||
|
||||
hk_dispatch_with_usc(dev, cs, s, usc, hk_grid(count, 1, 1),
|
||||
hk_grid(32, 1, 1));
|
||||
}
|
||||
|
||||
void
|
||||
hk_queue_write(struct hk_cmd_buffer *cmd, uint64_t address, uint32_t value,
|
||||
bool after_gfx)
|
||||
|
|
@ -218,6 +258,18 @@ hk_queue_write(struct hk_cmd_buffer *cmd, uint64_t address, uint32_t value,
|
|||
if (!cs)
|
||||
return;
|
||||
|
||||
/* TODO: Generalize this mechanism suitably */
|
||||
if (after_gfx) {
|
||||
struct libagx_imm_write imm = {.address = address, .value = value};
|
||||
|
||||
if (!cs->imm_writes.data) {
|
||||
util_dynarray_init(&cs->imm_writes, NULL);
|
||||
}
|
||||
|
||||
util_dynarray_append(&cs->imm_writes, struct libagx_imm_write, imm);
|
||||
return;
|
||||
}
|
||||
|
||||
hk_ensure_cs_has_space(cmd, cs, 0x2000 /* TODO */);
|
||||
|
||||
/* As soon as we mark a query available, it needs to be available system
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue