From dc111c128c061c08074c9d4f1d4839a31f429a4b Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 15 Jan 2025 12:59:49 -0500 Subject: [PATCH] hk: pass cmdbuf, not control stream, into precomp dispatch Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/libagx/libagx_dgc.h | 6 +++ src/asahi/vulkan/hk_cmd_buffer.c | 16 ++++-- src/asahi/vulkan/hk_cmd_buffer.h | 2 +- src/asahi/vulkan/hk_cmd_dispatch.c | 2 +- src/asahi/vulkan/hk_cmd_draw.c | 84 +++++++++++++----------------- src/asahi/vulkan/hk_query_pool.c | 20 +++---- 6 files changed, 63 insertions(+), 67 deletions(-) diff --git a/src/asahi/libagx/libagx_dgc.h b/src/asahi/libagx/libagx_dgc.h index a8831cee798..09ff6e0ec6e 100644 --- a/src/asahi/libagx/libagx_dgc.h +++ b/src/asahi/libagx/libagx_dgc.h @@ -106,6 +106,12 @@ enum agx_barrier { * have.... */ AGX_BARRIER_ALL = (1 << 0), + + /* Specifies ordering requirements for the compute job to happen before/after + * the concurrent enqueued graphics on a graphical command buffer. + */ + AGX_PREGFX = (1 << 1), + AGX_POSTGFX = (1 << 2), }; struct agx_draw { diff --git a/src/asahi/vulkan/hk_cmd_buffer.c b/src/asahi/vulkan/hk_cmd_buffer.c index 0d5862a48f4..789acf140ba 100644 --- a/src/asahi/vulkan/hk_cmd_buffer.c +++ b/src/asahi/vulkan/hk_cmd_buffer.c @@ -708,15 +708,23 @@ hk_upload_usc_words(struct hk_cmd_buffer *cmd, struct hk_shader *s, } void -hk_dispatch_precomp(struct hk_cs *cs, struct agx_grid grid, +hk_dispatch_precomp(struct hk_cmd_buffer *cmd, struct agx_grid grid, enum agx_barrier barrier, enum libagx_program idx, void *data, size_t data_size) { - struct hk_device *dev = hk_cmd_buffer_device(cs->cmd); + struct hk_device *dev = hk_cmd_buffer_device(cmd); struct agx_precompiled_shader *prog = agx_get_precompiled(&dev->bg_eot, idx); - struct agx_ptr t = hk_pool_usc_alloc(cs->cmd, agx_usc_size(15), 64); - uint64_t uploaded_data = hk_pool_upload(cs->cmd, data, data_size, 4); + struct hk_cs **target = (barrier & AGX_POSTGFX) ? &cmd->current_cs.post_gfx + : (barrier & AGX_PREGFX) ? &cmd->current_cs.pre_gfx + : &cmd->current_cs.cs; + + struct hk_cs *cs = hk_cmd_buffer_get_cs_general(cmd, target, true); + if (!cs) + return; + + struct agx_ptr t = hk_pool_usc_alloc(cmd, agx_usc_size(15), 64); + uint64_t uploaded_data = hk_pool_upload(cmd, data, data_size, 4); agx_usc_words_precomp(t.cpu, &prog->b, uploaded_data, data_size); diff --git a/src/asahi/vulkan/hk_cmd_buffer.h b/src/asahi/vulkan/hk_cmd_buffer.h index 87f0298695f..5fe31e8df00 100644 --- a/src/asahi/vulkan/hk_cmd_buffer.h +++ b/src/asahi/vulkan/hk_cmd_buffer.h @@ -803,7 +803,7 @@ hk_dispatch_with_local_size(struct hk_cmd_buffer *cmd, struct hk_cs *cs, hk_dispatch_with_usc(dev, cs, &s->b.info, usc, grid, local_size); } -void hk_dispatch_precomp(struct hk_cs *cs, struct agx_grid grid, +void hk_dispatch_precomp(struct hk_cmd_buffer *cmd, struct agx_grid grid, enum agx_barrier barrier, enum libagx_program idx, void *data, size_t data_size); diff --git a/src/asahi/vulkan/hk_cmd_dispatch.c b/src/asahi/vulkan/hk_cmd_dispatch.c index 8a7b599fb3a..47182809b5d 100644 --- a/src/asahi/vulkan/hk_cmd_dispatch.c +++ b/src/asahi/vulkan/hk_cmd_dispatch.c @@ -103,7 +103,7 @@ dispatch(struct hk_cmd_buffer *cmd, struct agx_grid grid) perf_debug(dev, "CS invocation statistic"); uint64_t grid = cmd->state.cs.descriptors.root.cs.group_count_addr; - libagx_increment_cs_invocations(cs, agx_1d(1), AGX_BARRIER_ALL, grid, + libagx_increment_cs_invocations(cmd, agx_1d(1), AGX_BARRIER_ALL, grid, stat, agx_workgroup_threads(local_size)); } diff --git a/src/asahi/vulkan/hk_cmd_draw.c b/src/asahi/vulkan/hk_cmd_draw.c index d1b7dad1e40..28d77079c76 100644 --- a/src/asahi/vulkan/hk_cmd_draw.c +++ b/src/asahi/vulkan/hk_cmd_draw.c @@ -844,11 +844,6 @@ hk_CmdBeginRendering(VkCommandBuffer commandBuffer, struct hk_device *dev = hk_cmd_buffer_device(cmd); perf_debug(dev, "Decompressing in-place"); - struct hk_cs *cs = hk_cmd_buffer_get_cs_general( - cmd, &cmd->current_cs.pre_gfx, true); - if (!cs) - return; - unsigned level = view->vk.base_mip_level; unsigned layer = view->vk.base_array_layer; uint64_t base = hk_image_base_address(image, image_plane); @@ -862,8 +857,8 @@ hk_CmdBeginRendering(VkCommandBuffer commandBuffer, agx_3d(ail_metadata_width_tl(layout, level) * 32, ail_metadata_height_tl(layout, level), layer_count); - libagx_decompress(cs, grid, AGX_BARRIER_ALL, layout, layer, - level, base, + libagx_decompress(cmd, grid, AGX_BARRIER_ALL | AGX_PREGFX, + layout, layer, level, base, hk_pool_upload(cmd, &imgs, sizeof(imgs), 64)); } } @@ -1359,8 +1354,8 @@ hk_draw_as_indexed_indirect(struct hk_cmd_buffer *cmd, struct agx_draw draw) } static struct agx_draw -hk_draw_without_restart(struct hk_cmd_buffer *cmd, struct hk_cs *cs, - struct agx_draw draw, uint32_t draw_count) +hk_draw_without_restart(struct hk_cmd_buffer *cmd, struct agx_draw draw, + uint32_t draw_count) { struct hk_device *dev = hk_cmd_buffer_device(cmd); struct hk_graphics_state *gfx = &cmd->state.gfx; @@ -1388,8 +1383,9 @@ hk_draw_without_restart(struct hk_cmd_buffer *cmd, struct hk_cs *cs, dyn->rs.provoking_vertex == VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, }; - libagx_unroll_restart_struct(cs, agx_1d(1024 * draw_count), AGX_BARRIER_ALL, - ia, draw.index_size, libagx_compact_prim(prim)); + libagx_unroll_restart_struct(cmd, agx_1d(1024 * draw_count), + AGX_BARRIER_ALL | AGX_PREGFX, ia, + draw.index_size, libagx_compact_prim(prim)); return agx_draw_indexed_indirect(ia.out_draw, dev->heap->va->addr, dev->heap->size, draw.index_size, @@ -1427,7 +1423,7 @@ hk_launch_gs_prerast(struct hk_cmd_buffer *cmd, struct hk_cs *cs, enum mesa_prim mode = hk_gs_in_prim(cmd); if (draw.restart) { - draw = hk_draw_without_restart(cmd, cs, draw, 1); + draw = hk_draw_without_restart(cmd, draw, 1); mode = u_decomposed_prim(mode); } @@ -1456,7 +1452,8 @@ hk_launch_gs_prerast(struct hk_cmd_buffer *cmd, struct hk_cs *cs, gsi.index_buffer_range_el = agx_draw_index_range_el(draw); } - libagx_gs_setup_indirect_struct(cs, agx_1d(1), AGX_BARRIER_ALL, gsi); + libagx_gs_setup_indirect_struct(cmd, agx_1d(1), + AGX_BARRIER_ALL | AGX_PREGFX, gsi); grid_vs = agx_grid_indirect( geometry_params + offsetof(struct agx_geometry_params, vs_grid)); @@ -1482,8 +1479,8 @@ hk_launch_gs_prerast(struct hk_cmd_buffer *cmd, struct hk_cs *cs, hk_dispatch_with_local_size(cmd, cs, count, grid_gs, agx_workgroup(1, 1, 1)); - libagx_prefix_sum_geom(cs, agx_1d(1024 * count_words), AGX_BARRIER_ALL, - geometry_params); + libagx_prefix_sum_geom(cmd, agx_1d(1024 * count_words), + AGX_BARRIER_ALL | AGX_PREGFX, geometry_params); } /* Pre-GS shader */ @@ -1544,7 +1541,8 @@ hk_launch_tess(struct hk_cmd_buffer *cmd, struct hk_cs *cs, args.in_index_buffer_range_el = agx_draw_index_range_el(draw); } - libagx_tess_setup_indirect_struct(cs, agx_1d(1), AGX_BARRIER_ALL, args); + libagx_tess_setup_indirect_struct(cmd, agx_1d(1), + AGX_BARRIER_ALL | AGX_PREGFX, args); uint32_t grid_stride = sizeof(uint32_t) * 6; grid_vs = agx_grid_indirect_local(gfx->tess.grids + 0 * grid_stride); @@ -1560,8 +1558,8 @@ hk_launch_tess(struct hk_cmd_buffer *cmd, struct hk_cs *cs, /* TCS invocation counter increments once per-patch */ if (tcs_stat) { perf_debug(dev, "Direct TCS statistic"); - libagx_increment_statistic(cs, agx_1d(1), AGX_BARRIER_ALL, tcs_stat, - patches); + libagx_increment_statistic( + cmd, agx_1d(1), AGX_BARRIER_ALL | AGX_PREGFX, tcs_stat, patches); } } @@ -1579,12 +1577,13 @@ hk_launch_tess(struct hk_cmd_buffer *cmd, struct hk_cs *cs, grid_tcs, agx_workgroup(tcs->info.tess.tcs_output_patch_size, 1, 1)); /* First generate counts, then prefix sum them, and then tessellate. */ - libagx_tessellate(cs, grid_tess, AGX_BARRIER_ALL, info.mode, + libagx_tessellate(cmd, grid_tess, AGX_BARRIER_ALL | AGX_PREGFX, info.mode, LIBAGX_TESS_MODE_COUNT, state); - libagx_prefix_sum_tess(cs, agx_1d(1024), AGX_BARRIER_ALL, state); + libagx_prefix_sum_tess(cmd, agx_1d(1024), AGX_BARRIER_ALL | AGX_PREGFX, + state); - libagx_tessellate(cs, grid_tess, AGX_BARRIER_ALL, info.mode, + libagx_tessellate(cmd, grid_tess, AGX_BARRIER_ALL | AGX_PREGFX, info.mode, LIBAGX_TESS_MODE_WITH_COUNTS, state); return agx_draw_indexed_indirect(gfx->tess.out_draws, dev->heap->va->addr, @@ -3316,7 +3315,7 @@ hk_set_view_index(struct hk_cmd_buffer *cmd, uint32_t view_idx) if (hk_set_view_index(cmd, view_idx)) static void -hk_ia_update(struct hk_cmd_buffer *cmd, struct hk_cs *cs, struct agx_draw draw, +hk_ia_update(struct hk_cmd_buffer *cmd, struct agx_draw draw, uint64_t ia_vertices, uint64_t ia_prims, uint64_t vs_invocations, uint64_t c_prims, uint64_t c_inv) { @@ -3352,13 +3351,14 @@ hk_ia_update(struct hk_cmd_buffer *cmd, struct hk_cs *cs, struct agx_draw draw, uint32_t index_size_B = agx_index_size_to_B(draw.index_size); libagx_increment_ia_restart( - cs, agx_1d(1024), AGX_BARRIER_ALL, ia_vertices, ia_prims, + cmd, agx_1d(1024), AGX_BARRIER_ALL | AGX_PREGFX, ia_vertices, ia_prims, vs_invocations, c_prims, c_inv, draw_ptr, draw.index_buffer, agx_draw_index_range_el(draw), cmd->state.gfx.index.restart, index_size_B, prim); } else { - libagx_increment_ia(cs, agx_1d(1), AGX_BARRIER_ALL, ia_vertices, ia_prims, - vs_invocations, c_prims, c_inv, draw_ptr, prim); + libagx_increment_ia(cmd, agx_1d(1), AGX_BARRIER_ALL | AGX_PREGFX, + ia_vertices, ia_prims, vs_invocations, c_prims, c_inv, + draw_ptr, prim); } } @@ -3422,9 +3422,7 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_) cs->stats.calls++; - if (geom || tess || ia_stats || needs_idx_robust || - (adj && (agx_is_indirect(draw.b) || draw.restart))) { - + if (geom || tess) { ccs = hk_cmd_buffer_get_cs_general(cmd, &cmd->current_cs.pre_gfx, true); if (!ccs) @@ -3432,7 +3430,7 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_) } if (ia_stats) { - hk_ia_update(cmd, ccs, draw, stat_ia_verts, stat_ia_prims, stat_vs_inv, + hk_ia_update(cmd, draw, stat_ia_verts, stat_ia_prims, stat_vs_inv, stat_c_prims, stat_c_inv); } @@ -3457,7 +3455,7 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_) enum mesa_prim prim = vk_conv_topology(dyn->ia.primitive_topology); if (draw.restart) { - draw = hk_draw_without_restart(cmd, ccs, draw, 1); + draw = hk_draw_without_restart(cmd, draw, 1); prim = u_decomposed_prim(prim); } @@ -3471,8 +3469,8 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_) struct hk_descriptor_state *desc = &cmd->state.gfx.descriptors; libagx_draw_without_adj( - ccs, agx_1d(1), AGX_BARRIER_ALL, out_draw, draw.b.ptr, - desc->root.draw.input_assembly, draw.index_buffer, + cmd, agx_1d(1), AGX_BARRIER_ALL | AGX_PREGFX, out_draw, + draw.b.ptr, desc->root.draw.input_assembly, draw.index_buffer, draw.indexed ? agx_draw_index_range_el(draw) : 0, draw.indexed ? agx_index_size_to_B(draw.index_size) : 0, prim); @@ -3498,8 +3496,8 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_) size_t size_B = libagx_draw_robust_index_vdm_size(); uint64_t target = hk_cs_alloc_for_indirect(cs, size_B); - libagx_draw_robust_index(ccs, agx_1d(32), AGX_BARRIER_ALL, target, - hk_geometry_state(cmd), draw.b.ptr, + libagx_draw_robust_index(cmd, agx_1d(32), AGX_BARRIER_ALL | AGX_PREGFX, + target, hk_geometry_state(cmd), draw.b.ptr, draw.index_buffer, draw.index_buffer_range_B, draw.restart, topology, draw.index_size); } else { @@ -3709,11 +3707,6 @@ hk_draw_indirect_count(VkCommandBuffer commandBuffer, VkBuffer _buffer, struct hk_device *dev = hk_cmd_buffer_device(cmd); perf_debug(dev, "Draw indirect count"); - struct hk_cs *cs = - hk_cmd_buffer_get_cs_general(cmd, &cmd->current_cs.pre_gfx, true); - if (!cs) - return; - assert((stride % 4) == 0 && "aligned"); size_t out_stride = sizeof(uint32_t) * (indexed ? 5 : 4); @@ -3721,8 +3714,9 @@ hk_draw_indirect_count(VkCommandBuffer commandBuffer, VkBuffer _buffer, uint64_t in = hk_buffer_address(buffer, offset); uint64_t count_addr = hk_buffer_address(count_buffer, countBufferOffset); - libagx_predicate_indirect(cs, agx_1d(maxDrawCount), AGX_BARRIER_ALL, patched, - in, count_addr, stride / 4, indexed); + libagx_predicate_indirect(cmd, agx_1d(maxDrawCount), + AGX_BARRIER_ALL | AGX_PREGFX, patched, in, + count_addr, stride / 4, indexed); if (indexed) { hk_draw_indexed_indirect_inner(commandBuffer, patched, maxDrawCount, @@ -3799,11 +3793,6 @@ hk_begin_end_xfb(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer, gfx->xfb_offsets = hk_pool_alloc(cmd, 4 * sizeof(uint32_t), 4).gpu; } - struct hk_cs *cs = - hk_cmd_buffer_get_cs_general(cmd, &cmd->current_cs.pre_gfx, true); - if (!cs) - return; - struct libagx_xfb_counter_copy params = {}; unsigned copies = 0; @@ -3839,7 +3828,8 @@ hk_begin_end_xfb(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer, if (copies > 0) { perf_debug(dev, "XFB counter copy"); - libagx_copy_xfb_counters(cs, agx_1d(copies), AGX_BARRIER_ALL, + libagx_copy_xfb_counters(cmd, agx_1d(copies), + AGX_BARRIER_ALL | AGX_PREGFX, hk_pool_upload(cmd, ¶ms, sizeof(params), 8)); } } diff --git a/src/asahi/vulkan/hk_query_pool.c b/src/asahi/vulkan/hk_query_pool.c index c3d4b76d38d..7e5828ac7b1 100644 --- a/src/asahi/vulkan/hk_query_pool.c +++ b/src/asahi/vulkan/hk_query_pool.c @@ -14,6 +14,7 @@ #include "hk_entrypoints.h" #include "hk_shader.h" +#include "libagx_dgc.h" #include "libagx_shaders.h" #include "vk_common_entrypoints.h" @@ -266,7 +267,7 @@ hk_dispatch_imm_writes(struct hk_cmd_buffer *cmd, struct hk_cs *cs) util_dynarray_num_elements(&cs->imm_writes, struct libagx_imm_write); assert(count > 0); - libagx_write_u32s(cs, agx_1d(count), AGX_BARRIER_ALL, params); + libagx_write_u32s(cmd, agx_1d(count), AGX_BARRIER_ALL | AGX_POSTGFX, params); } void @@ -301,7 +302,7 @@ hk_queue_write(struct hk_cmd_buffer *cmd, uint64_t address, uint32_t value, hk_cdm_cache_flush(dev, cs); perf_debug(dev, "Queued write"); - libagx_write_u32(cs, agx_1d(1), AGX_BARRIER_ALL, address, value); + libagx_write_u32(cmd, agx_1d(1), AGX_BARRIER_ALL, address, value); } /** @@ -424,13 +425,8 @@ hk_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, if (cs->timestamp.end.addr) { assert(after_gfx && "compute is handled above"); - struct hk_cs *after = - hk_cmd_buffer_get_cs_general(cmd, &cmd->current_cs.post_gfx, true); - if (!after) - return; - - libagx_copy_timestamp(after, agx_1d(1), AGX_BARRIER_ALL, report_addr, - cs->timestamp.end.addr); + libagx_copy_timestamp(cmd, agx_1d(1), AGX_BARRIER_ALL | AGX_POSTGFX, + report_addr, cs->timestamp.end.addr); } else { cs->timestamp.end = (struct agx_timestamp_req){ .addr = report_addr, @@ -675,10 +671,6 @@ hk_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool, struct hk_device *dev = hk_cmd_buffer_device(cmd); hk_flush_if_timestamp(cmd, pool); - struct hk_cs *cs = hk_cmd_buffer_get_cs(cmd, true); - if (!cs) - return; - perf_debug(dev, "Query pool copy"); struct libagx_copy_query_args info = { @@ -697,5 +689,5 @@ hk_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool, .with_availability = flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT, }; - libagx_copy_query_struct(cs, agx_1d(queryCount), AGX_BARRIER_ALL, info); + libagx_copy_query_struct(cmd, agx_1d(queryCount), AGX_BARRIER_ALL, info); }